diff options
Diffstat (limited to 'third_party/aom/test')
208 files changed, 76067 insertions, 0 deletions
diff --git a/third_party/aom/test/accounting_test.cc b/third_party/aom/test/accounting_test.cc new file mode 100644 index 0000000000..033499d13b --- /dev/null +++ b/third_party/aom/test/accounting_test.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "aom/aom_integer.h" +#include "aom_dsp/bitreader.h" +#include "aom_dsp/bitwriter.h" + +using libaom_test::ACMRandom; + +TEST(AV1, TestAccounting) { + const int kBufferSize = 10000; + const int kSymbols = 1024; + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + aom_start_encode(&bw, bw_buffer); + for (int i = 0; i < kSymbols; i++) { + aom_write(&bw, 0, 32); + aom_write(&bw, 0, 32); + aom_write(&bw, 0, 32); + } + GTEST_ASSERT_GE(aom_stop_encode(&bw), 0); + aom_reader br; + aom_reader_init(&br, bw_buffer, bw.pos); + + Accounting accounting; + aom_accounting_init(&accounting); + br.accounting = &accounting; + for (int i = 0; i < kSymbols; i++) { + aom_read(&br, 32, "A"); + } + // Consecutive symbols that are the same are coalesced. + GTEST_ASSERT_EQ(accounting.syms.num_syms, 1); + GTEST_ASSERT_EQ(accounting.syms.syms[0].samples, (unsigned int)kSymbols); + + aom_accounting_reset(&accounting); + GTEST_ASSERT_EQ(accounting.syms.num_syms, 0); + + // Should record 2 * kSymbols accounting symbols. + aom_reader_init(&br, bw_buffer, bw.pos); + br.accounting = &accounting; + for (int i = 0; i < kSymbols; i++) { + aom_read(&br, 32, "A"); + aom_read(&br, 32, "B"); + aom_read(&br, 32, "B"); + } + GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols * 2); + uint32_t tell_frac = aom_reader_tell_frac(&br); + for (int i = 0; i < accounting.syms.num_syms; i++) { + tell_frac -= accounting.syms.syms[i].bits; + } + GTEST_ASSERT_EQ(tell_frac, 0U); + + GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, "A"), + aom_accounting_dictionary_lookup(&accounting, "A")); + + // Check for collisions. The current aom_accounting_hash function returns + // the same hash code for AB and BA. + GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, "AB"), + aom_accounting_dictionary_lookup(&accounting, "BA")); +} diff --git a/third_party/aom/test/acm_random.h b/third_party/aom/test/acm_random.h new file mode 100644 index 0000000000..15e8c9cc2e --- /dev/null +++ b/third_party/aom/test/acm_random.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_ACM_RANDOM_H_ +#define AOM_TEST_ACM_RANDOM_H_ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "aom/aom_integer.h" + +namespace libaom_test { + +class ACMRandom { + public: + ACMRandom() : random_(DeterministicSeed()) {} + + explicit ACMRandom(int seed) : random_(seed) {} + + void Reset(int seed) { random_.Reseed(seed); } + + // Generates a random 31-bit unsigned integer from [0, 2^31). + uint32_t Rand31() { + return random_.Generate(testing::internal::Random::kMaxRange); + } + + uint16_t Rand16() { + const uint32_t value = + random_.Generate(testing::internal::Random::kMaxRange); + // There's a bit more entropy in the upper bits of this implementation. + return (value >> 15) & 0xffff; + } + + int16_t Rand16Signed() { return static_cast<int16_t>(Rand16()); } + + int16_t Rand15() { + const uint32_t value = + random_.Generate(testing::internal::Random::kMaxRange); + // There's a bit more entropy in the upper bits of this implementation. + return (value >> 16) & 0x7fff; + } + + int16_t Rand15Signed() { + // Use 15 bits: values between 16383 (0x3FFF) and -16384 (0xC000). + return static_cast<int16_t>(Rand15()) - (1 << 14); + } + + uint16_t Rand12() { + const uint32_t value = + random_.Generate(testing::internal::Random::kMaxRange); + // There's a bit more entropy in the upper bits of this implementation. + return (value >> 19) & 0xfff; + } + + uint8_t Rand8() { + const uint32_t value = + random_.Generate(testing::internal::Random::kMaxRange); + // There's a bit more entropy in the upper bits of this implementation. + return (value >> 23) & 0xff; + } + + uint8_t Rand8Extremes() { + // Returns a random value near 0 or near 255, to better exercise + // saturation behavior. + const uint8_t r = Rand8(); + return static_cast<uint8_t>((r < 128) ? r << 4 : r >> 4); + } + + int PseudoUniform(int range) { return random_.Generate(range); } + + int operator()(int n) { return PseudoUniform(n); } + + static int DeterministicSeed() { return 0xbaba; } + + private: + testing::internal::Random random_; +}; + +} // namespace libaom_test + +#endif // AOM_TEST_ACM_RANDOM_H_ diff --git a/third_party/aom/test/active_map_test.cc b/third_party/aom/test/active_map_test.cc new file mode 100644 index 0000000000..979ee6b8b3 --- /dev/null +++ b/third_party/aom/test/active_map_test.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <climits> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +class ActiveMapTest + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, + public ::libaom_test::EncoderTest { + protected: + static const int kWidth = 208; + static const int kHeight = 144; + + ActiveMapTest() : EncoderTest(GET_PARAM(0)) {} + ~ActiveMapTest() override = default; + + void SetUp() override { + InitializeConfig(GET_PARAM(1)); + cpu_used_ = GET_PARAM(2); + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AV1E_SET_ALLOW_WARPED_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_OBMC, 0); + } else if (video->frame() == 3) { + aom_active_map_t map = aom_active_map_t(); + /* clang-format off */ + uint8_t active_map[9 * 13] = { + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, + 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, + }; + /* clang-format on */ + map.cols = (kWidth + 15) / 16; + map.rows = (kHeight + 15) / 16; + ASSERT_EQ(map.cols, 13u); + ASSERT_EQ(map.rows, 9u); + map.active_map = active_map; + encoder->Control(AOME_SET_ACTIVEMAP, &map); + } else if (video->frame() == 15) { + aom_active_map_t map = aom_active_map_t(); + map.cols = (kWidth + 15) / 16; + map.rows = (kHeight + 15) / 16; + map.active_map = nullptr; + encoder->Control(AOME_SET_ACTIVEMAP, &map); + } + } + + void DoTest() { + // Validate that this non multiple of 64 wide clip encodes + cfg_.g_lag_in_frames = 0; + cfg_.rc_target_bitrate = 400; + cfg_.rc_resize_mode = 0; + cfg_.g_pass = AOM_RC_ONE_PASS; + cfg_.rc_end_usage = AOM_CBR; + cfg_.kf_max_dist = 90000; + ::libaom_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30, + 1, 0, 20); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + int cpu_used_; +}; + +TEST_P(ActiveMapTest, Test) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(ActiveMapTest, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(5, 9)); + +} // namespace diff --git a/third_party/aom/test/allintra_end_to_end_test.cc b/third_party/aom/test/allintra_end_to_end_test.cc new file mode 100644 index 0000000000..8ec24aa686 --- /dev/null +++ b/third_party/aom/test/allintra_end_to_end_test.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2022, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> +#include <ostream> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" + +namespace { + +const unsigned int kFrames = 20; +const int kBitrate = 500; +typedef struct { + const char *filename; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) { + return os << "TestVideoParam { filename:" << test_arg.filename + << " input_bit_depth:" << test_arg.input_bit_depth + << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth + << " profile:" << test_arg.profile << " }"; +} + +const TestVideoParam kTestVectors[] = { + { "niklas_1280_720_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, +}; + +// Params: test video, speed, aq mode, threads, tile columns. +class AllIntraEndToEndTest + : public ::libaom_test::CodecTestWith6Params<TestVideoParam, int, int, int, + int, int>, + public ::libaom_test::EncoderTest { + protected: + AllIntraEndToEndTest() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0), + deltaq_mode_(GET_PARAM(3)), threads_(GET_PARAM(4)), + tile_columns_(GET_PARAM(5)), enable_tx_size_search_(GET_PARAM(6)) {} + + ~AllIntraEndToEndTest() override = default; + + void SetUp() override { + InitializeConfig(::libaom_test::kAllIntra); + cfg_.g_threads = threads_; + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_ROW_MT, 1); + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_); + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AV1E_SET_DELTAQ_MODE, deltaq_mode_); + encoder->Control(AV1E_SET_ENABLE_TX_SIZE_SEARCH, enable_tx_size_search_); + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + void DoTest() { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + std::unique_ptr<libaom_test::VideoSource> video; + if (is_extension_y4m(test_video_param_.filename)) + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + else + video.reset(new libaom_test::YUVVideoSource(test_video_param_.filename, + test_video_param_.fmt, 352, + 288, 30, 1, 0, kFrames)); + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + } + + TestVideoParam test_video_param_; + int cpu_used_; + + private: + double psnr_; + unsigned int nframes_; + unsigned int deltaq_mode_; + int threads_; + int tile_columns_; + int enable_tx_size_search_; +}; + +TEST_P(AllIntraEndToEndTest, EndToEndNoFailure) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(AllIntraEndToEndTest, + ::testing::ValuesIn(kTestVectors), + ::testing::Range(5, 9), ::testing::Range(0, 4), + ::testing::Values(1), ::testing::Values(1), + ::testing::Values(0, 1)); + +INSTANTIATE_TEST_SUITE_P( + AV1MultiThreaded, AllIntraEndToEndTest, + ::testing::Combine( + ::testing::Values( + static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)), + ::testing::ValuesIn(kTestVectors), ::testing::Range(5, 9), + ::testing::Range(0, 4), ::testing::Values(6), ::testing::Values(1), + ::testing::Values(0, 1))); + +} // namespace diff --git a/third_party/aom/test/altref_test.cc b/third_party/aom/test/altref_test.cc new file mode 100644 index 0000000000..081123cbe4 --- /dev/null +++ b/third_party/aom/test/altref_test.cc @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +namespace { +typedef struct { + const unsigned int min_kf_dist; + const unsigned int max_kf_dist; + const unsigned int min_gf_interval; + const unsigned int max_gf_interval; + const unsigned int lag_in_frames; + libaom_test::TestMode encoding_mode; +} AltRefTestParams; + +static const AltRefTestParams TestParams[] = { + { 0, 10, 4, 8, 10, ::libaom_test::kOnePassGood }, + { 0, 30, 8, 12, 16, ::libaom_test::kOnePassGood }, + { 30, 30, 12, 16, 25, ::libaom_test::kOnePassGood }, + { 0, 60, 12, 20, 25, ::libaom_test::kOnePassGood }, + { 60, 60, 16, 28, 30, ::libaom_test::kOnePassGood }, + { 0, 100, 16, 32, 35, ::libaom_test::kOnePassGood }, + { 0, 10, 4, 8, 10, ::libaom_test::kTwoPassGood }, + { 0, 30, 8, 12, 16, ::libaom_test::kTwoPassGood }, + { 30, 30, 12, 16, 25, ::libaom_test::kTwoPassGood }, + { 0, 60, 16, 24, 25, ::libaom_test::kTwoPassGood }, + { 60, 60, 20, 28, 30, ::libaom_test::kTwoPassGood }, + { 0, 100, 24, 32, 35, ::libaom_test::kTwoPassGood }, +}; + +std::ostream &operator<<(std::ostream &os, const AltRefTestParams &test_arg) { + return os << "AltRefTestParams { min_kf_dist:" << test_arg.min_kf_dist + << " max_kf_dist:" << test_arg.max_kf_dist + << " min_gf_interval:" << test_arg.min_gf_interval + << " max_gf_interval:" << test_arg.max_gf_interval + << " lag_in_frames:" << test_arg.lag_in_frames + << " encoding_mode:" << test_arg.encoding_mode << " }"; +} + +// This class is used to check the presence of altref frame. +class AltRefFramePresenceTestLarge + : public ::libaom_test::CodecTestWith2Params<AltRefTestParams, aom_rc_mode>, + public ::libaom_test::EncoderTest { + protected: + AltRefFramePresenceTestLarge() + : EncoderTest(GET_PARAM(0)), altref_test_params_(GET_PARAM(1)), + rc_end_usage_(GET_PARAM(2)) { + is_arf_frame_present_ = 0; + } + ~AltRefFramePresenceTestLarge() override = default; + + void SetUp() override { + InitializeConfig(altref_test_params_.encoding_mode); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = rc_end_usage_; + cfg_.g_threads = 1; + cfg_.kf_min_dist = altref_test_params_.min_kf_dist; + cfg_.kf_max_dist = altref_test_params_.max_kf_dist; + cfg_.g_lag_in_frames = altref_test_params_.lag_in_frames; + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AV1E_SET_MIN_GF_INTERVAL, + altref_test_params_.min_gf_interval); + encoder->Control(AV1E_SET_MAX_GF_INTERVAL, + altref_test_params_.max_gf_interval); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (is_arf_frame_present_ != 1 && AOM_CODEC_OK == res_dec) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_ALTREF_PRESENT, + &is_arf_frame_present_); + } + return AOM_CODEC_OK == res_dec; + } + + const AltRefTestParams altref_test_params_; + int is_arf_frame_present_; + aom_rc_mode rc_end_usage_; +}; + +TEST_P(AltRefFramePresenceTestLarge, AltRefFrameEncodePresenceTest) { + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 100); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(is_arf_frame_present_, 1); +} + +AV1_INSTANTIATE_TEST_SUITE(AltRefFramePresenceTestLarge, + ::testing::ValuesIn(TestParams), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ)); + +typedef struct { + const ::libaom_test::TestMode encoding_mode; + const unsigned int min_gf_interval; + const unsigned int max_gf_interval; +} gfIntervalParam; + +const gfIntervalParam gfTestParams[] = { + // single pass + { ::libaom_test::kOnePassGood, 0, 6 }, + { ::libaom_test::kOnePassGood, 0, 8 }, + { ::libaom_test::kOnePassGood, 5, 10 }, + { ::libaom_test::kOnePassGood, 8, 16 }, + { ::libaom_test::kOnePassGood, 16, 16 }, + + // two pass + { ::libaom_test::kTwoPassGood, 0, 6 }, + { ::libaom_test::kTwoPassGood, 0, 8 }, + { ::libaom_test::kTwoPassGood, 5, 10 }, + { ::libaom_test::kTwoPassGood, 8, 16 }, + { ::libaom_test::kTwoPassGood, 16, 32 }, + { ::libaom_test::kTwoPassGood, 20, 32 }, +}; + +// This class is used to test if the gf interval bounds configured by the user +// are respected by the encoder. +class GoldenFrameIntervalTestLarge + : public ::libaom_test::CodecTestWith2Params<gfIntervalParam, aom_rc_mode>, + public ::libaom_test::EncoderTest { + protected: + GoldenFrameIntervalTestLarge() + : EncoderTest(GET_PARAM(0)), gf_interval_param_(GET_PARAM(1)), + rc_end_usage_(GET_PARAM(2)) { + baseline_gf_interval_ = -1; + limit_ = 60; + frame_num_ = 0; + } + ~GoldenFrameIntervalTestLarge() override = default; + + void SetUp() override { + InitializeConfig(gf_interval_param_.encoding_mode); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = rc_end_usage_; + cfg_.g_threads = 1; + // kf_min_dist is equal to kf_max_dist to make sure that there are no scene + // cuts due to which the min_gf_interval may not be respected. + cfg_.kf_min_dist = limit_; + cfg_.kf_max_dist = limit_; + cfg_.g_limit = limit_; + cfg_.g_lag_in_frames = 35; + cfg_.rc_target_bitrate = 1000; + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AV1E_SET_MIN_GF_INTERVAL, + gf_interval_param_.min_gf_interval); + encoder->Control(AV1E_SET_MAX_GF_INTERVAL, + gf_interval_param_.max_gf_interval); + } + if (frame_num_ > 0) { + encoder->Control(AV1E_GET_BASELINE_GF_INTERVAL, &baseline_gf_interval_); + ASSERT_LE(baseline_gf_interval_, + (int)gf_interval_param_.max_gf_interval + 1); + if ((frame_num_ + (int)gf_interval_param_.min_gf_interval) <= limit_) { + ASSERT_GE(baseline_gf_interval_, + (int)gf_interval_param_.min_gf_interval); + } + } + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + (void)pkt; + ++frame_num_; + } + + const gfIntervalParam gf_interval_param_; + int baseline_gf_interval_; + int limit_; + int frame_num_; + aom_rc_mode rc_end_usage_; +}; + +TEST_P(GoldenFrameIntervalTestLarge, GoldenFrameIntervalTest) { + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, limit_); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_SUITE(GoldenFrameIntervalTestLarge, + ::testing::ValuesIn(gfTestParams), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CQ, AOM_CBR)); + +} // namespace diff --git a/third_party/aom/test/aom_image_test.cc b/third_party/aom/test/aom_image_test.cc new file mode 100644 index 0000000000..ad48e73e3d --- /dev/null +++ b/third_party/aom/test/aom_image_test.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "aom/aom_image.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +TEST(AomImageTest, AomImgWrapInvalidAlign) { + const int kWidth = 128; + const int kHeight = 128; + unsigned char buf[kWidth * kHeight * 3]; + + aom_image_t img; + // Set img_data and img_data_owner to junk values. aom_img_wrap() should + // not read these values on failure. + img.img_data = (unsigned char *)""; + img.img_data_owner = 1; + + aom_img_fmt_t format = AOM_IMG_FMT_I444; + // 'align' must be a power of 2 but is not. This causes the aom_img_wrap() + // call to fail. The test verifies we do not read the junk values in 'img'. + unsigned int align = 31; + EXPECT_EQ(aom_img_wrap(&img, format, kWidth, kHeight, align, buf), nullptr); +} + +TEST(AomImageTest, AomImgSetRectOverflow) { + const int kWidth = 128; + const int kHeight = 128; + unsigned char buf[kWidth * kHeight * 3]; + + aom_image_t img; + aom_img_fmt_t format = AOM_IMG_FMT_I444; + unsigned int align = 32; + EXPECT_EQ(aom_img_wrap(&img, format, kWidth, kHeight, align, buf), &img); + + EXPECT_EQ(aom_img_set_rect(&img, 0, 0, kWidth, kHeight, 0), 0); + // This would result in overflow because -1 is cast to UINT_MAX. + EXPECT_NE(aom_img_set_rect(&img, static_cast<unsigned int>(-1), + static_cast<unsigned int>(-1), kWidth, kHeight, 0), + 0); +} + +TEST(AomImageTest, AomImgAllocNv12) { + const int kWidth = 128; + const int kHeight = 128; + + aom_image_t img; + aom_img_fmt_t format = AOM_IMG_FMT_NV12; + unsigned int align = 32; + EXPECT_NE(aom_img_alloc(&img, format, kWidth, kHeight, align), nullptr); + EXPECT_EQ(img.stride[AOM_PLANE_U], img.stride[AOM_PLANE_Y]); + EXPECT_EQ(img.stride[AOM_PLANE_V], 0); + EXPECT_EQ(img.planes[AOM_PLANE_V], nullptr); + aom_img_free(&img); +} diff --git a/third_party/aom/test/aom_integer_test.cc b/third_party/aom/test/aom_integer_test.cc new file mode 100644 index 0000000000..fcbbfb4d48 --- /dev/null +++ b/third_party/aom/test/aom_integer_test.cc @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "aom/aom_integer.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { +const uint64_t kMaximumLeb128CodedSize = 8; +const uint8_t kLeb128PadByte = 0x80; // Binary: 10000000 +const uint64_t kMaximumLeb128Value = UINT32_MAX; +const uint32_t kSizeTestNumValues = 6; +const uint32_t kSizeTestExpectedSizes[kSizeTestNumValues] = { + 1, 1, 2, 3, 4, 5 +}; +const uint64_t kSizeTestInputs[kSizeTestNumValues] = { 0, 0x7f, + 0x3fff, 0x1fffff, + 0xffffff, 0x10000000 }; + +const uint8_t kOutOfRangeLeb128Value[5] = { 0x80, 0x80, 0x80, 0x80, + 0x10 }; // UINT32_MAX + 1 +} // namespace + +TEST(AomLeb128, DecodeTest) { + const size_t num_leb128_bytes = 3; + const uint8_t leb128_bytes[num_leb128_bytes] = { 0xE5, 0x8E, 0x26 }; + const uint64_t expected_value = 0x98765; // 624485 + const size_t expected_length = 3; + uint64_t value = ~0ULL; // make sure value is cleared by the function + size_t length; + ASSERT_EQ( + aom_uleb_decode(&leb128_bytes[0], num_leb128_bytes, &value, &length), 0); + ASSERT_EQ(expected_value, value); + ASSERT_EQ(expected_length, length); + + // Make sure the decoder stops on the last marked LEB128 byte. + aom_uleb_decode(&leb128_bytes[0], num_leb128_bytes + 1, &value, &length); + ASSERT_EQ(expected_value, value); + ASSERT_EQ(expected_length, length); +} + +TEST(AomLeb128, EncodeTest) { + const uint32_t test_value = 0x98765; // 624485 + const uint8_t expected_bytes[3] = { 0xE5, 0x8E, 0x26 }; + const size_t kWriteBufferSize = 4; + uint8_t write_buffer[kWriteBufferSize] = { 0 }; + size_t bytes_written = 0; + ASSERT_EQ(aom_uleb_encode(test_value, kWriteBufferSize, &write_buffer[0], + &bytes_written), + 0); + ASSERT_EQ(bytes_written, 3u); + for (size_t i = 0; i < bytes_written; ++i) { + ASSERT_EQ(write_buffer[i], expected_bytes[i]); + } +} + +TEST(AomLeb128, EncodeDecodeTest) { + const uint32_t value = 0x98765; // 624485 + const size_t kWriteBufferSize = 4; + uint8_t write_buffer[kWriteBufferSize] = { 0 }; + size_t bytes_written = 0; + ASSERT_EQ(aom_uleb_encode(value, kWriteBufferSize, &write_buffer[0], + &bytes_written), + 0); + ASSERT_EQ(bytes_written, 3u); + uint64_t decoded_value; + size_t decoded_length; + aom_uleb_decode(&write_buffer[0], bytes_written, &decoded_value, + &decoded_length); + ASSERT_EQ(value, decoded_value); + ASSERT_EQ(bytes_written, decoded_length); +} + +TEST(AomLeb128, FixedSizeEncodeTest) { + const uint32_t test_value = 0x123; + const uint8_t expected_bytes[4] = { 0xa3, 0x82, 0x80, 0x00 }; + const size_t kWriteBufferSize = 4; + uint8_t write_buffer[kWriteBufferSize] = { 0 }; + size_t bytes_written = 0; + ASSERT_EQ(0, aom_uleb_encode_fixed_size(test_value, kWriteBufferSize, + kWriteBufferSize, &write_buffer[0], + &bytes_written)); + ASSERT_EQ(kWriteBufferSize, bytes_written); + for (size_t i = 0; i < bytes_written; ++i) { + ASSERT_EQ(write_buffer[i], expected_bytes[i]); + } +} + +TEST(AomLeb128, FixedSizeEncodeDecodeTest) { + const uint32_t value = 0x1; + const size_t kWriteBufferSize = 4; + uint8_t write_buffer[kWriteBufferSize] = { 0 }; + size_t bytes_written = 0; + ASSERT_EQ( + aom_uleb_encode_fixed_size(value, kWriteBufferSize, kWriteBufferSize, + &write_buffer[0], &bytes_written), + 0); + ASSERT_EQ(bytes_written, 4u); + uint64_t decoded_value; + size_t decoded_length; + aom_uleb_decode(&write_buffer[0], bytes_written, &decoded_value, + &decoded_length); + ASSERT_EQ(value, decoded_value); + ASSERT_EQ(bytes_written, decoded_length); +} + +TEST(AomLeb128, SizeTest) { + for (size_t i = 0; i < kSizeTestNumValues; ++i) { + ASSERT_EQ(kSizeTestExpectedSizes[i], + aom_uleb_size_in_bytes(kSizeTestInputs[i])); + } +} + +TEST(AomLeb128, DecodeFailTest) { + // Input buffer containing what would be a valid 9 byte LEB128 encoded + // unsigned integer. + const uint8_t kAllPadBytesBuffer[kMaximumLeb128CodedSize + 1] = { + kLeb128PadByte, kLeb128PadByte, kLeb128PadByte, + kLeb128PadByte, kLeb128PadByte, kLeb128PadByte, + kLeb128PadByte, kLeb128PadByte, 0 + }; + uint64_t decoded_value; + + // Test that decode fails when result would be valid 9 byte integer. + ASSERT_EQ(aom_uleb_decode(&kAllPadBytesBuffer[0], kMaximumLeb128CodedSize + 1, + &decoded_value, nullptr), + -1); + + // Test that encoded value missing terminator byte within available buffer + // range causes decode error. + ASSERT_EQ(aom_uleb_decode(&kAllPadBytesBuffer[0], kMaximumLeb128CodedSize, + &decoded_value, nullptr), + -1); + + // Test that LEB128 input that decodes to a value larger than 32-bits fails. + size_t value_size = 0; + ASSERT_EQ(aom_uleb_decode(&kOutOfRangeLeb128Value[0], + sizeof(kOutOfRangeLeb128Value), &decoded_value, + &value_size), + -1); +} + +TEST(AomLeb128, EncodeFailTest) { + const size_t kWriteBufferSize = 4; + const uint32_t kValidTestValue = 1; + uint8_t write_buffer[kWriteBufferSize] = { 0 }; + size_t coded_size = 0; + ASSERT_EQ( + aom_uleb_encode(kValidTestValue, kWriteBufferSize, nullptr, &coded_size), + -1); + ASSERT_EQ(aom_uleb_encode(kValidTestValue, kWriteBufferSize, &write_buffer[0], + nullptr), + -1); + + const uint32_t kValueOutOfRangeForBuffer = 0xFFFFFFFF; + ASSERT_EQ(aom_uleb_encode(kValueOutOfRangeForBuffer, kWriteBufferSize, + &write_buffer[0], &coded_size), + -1); + + const uint64_t kValueOutOfRange = kMaximumLeb128Value + 1; + ASSERT_EQ(aom_uleb_encode(kValueOutOfRange, kWriteBufferSize, + &write_buffer[0], &coded_size), + -1); + + const size_t kPadSizeOutOfRange = 5; + ASSERT_EQ(aom_uleb_encode_fixed_size(kValidTestValue, kWriteBufferSize, + kPadSizeOutOfRange, &write_buffer[0], + &coded_size), + -1); +} diff --git a/third_party/aom/test/aom_mem_test.cc b/third_party/aom/test/aom_mem_test.cc new file mode 100644 index 0000000000..849ba64435 --- /dev/null +++ b/third_party/aom/test/aom_mem_test.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "aom_mem/aom_mem.h" + +#include <cstdio> +#include <cstddef> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +TEST(AomMemTest, Overflow) { + // Allocations are aligned > 1 so SIZE_MAX should always fail. + ASSERT_EQ(aom_malloc(SIZE_MAX), nullptr); + ASSERT_EQ(aom_calloc(1, SIZE_MAX), nullptr); + ASSERT_EQ(aom_calloc(32, SIZE_MAX / 32), nullptr); + ASSERT_EQ(aom_calloc(SIZE_MAX, SIZE_MAX), nullptr); + ASSERT_EQ(aom_memalign(1, SIZE_MAX), nullptr); + ASSERT_EQ(aom_memalign(64, SIZE_MAX), nullptr); + ASSERT_EQ(aom_memalign(64, SIZE_MAX - 64), nullptr); + ASSERT_EQ(aom_memalign(64, SIZE_MAX - 64 - sizeof(size_t) + 2), nullptr); +} + +TEST(AomMemTest, NullParams) { + ASSERT_EQ(aom_memset16(nullptr, 0, 0), nullptr); + aom_free(nullptr); +} diff --git a/third_party/aom/test/aomcx_set_ref.sh b/third_party/aom/test/aomcx_set_ref.sh new file mode 100755 index 0000000000..237e2f319c --- /dev/null +++ b/third_party/aom/test/aomcx_set_ref.sh @@ -0,0 +1,58 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom aom_cx_set_ref example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to aom_cx_set_ref_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +aom_cx_set_ref_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi +} + +# Runs aom_cx_set_ref and updates the reference frame before encoding frame 90. +# $1 is the codec name, which aom_cx_set_ref does not support at present: It's +# currently used only to name the output file. +# TODO(tomfinegan): Pass the codec param once the example is updated to support +# AV1. +aom_set_ref() { + local encoder="${LIBAOM_BIN_PATH}/aom_cx_set_ref${AOM_TEST_EXE_SUFFIX}" + local codec="$1" + local output_file="${AOM_TEST_OUTPUT_DIR}/aom_cx_set_ref_${codec}.ivf" + local ref_frame_num=4 + local limit=10 + if [ ! -x "${encoder}" ]; then + elog "${encoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \ + "${ref_frame_num}" "${limit}" ${devnull} || return 1 + + [ -e "${output_file}" ] || return 1 +} + +aom_cx_set_ref_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + aom_set_ref av1 || return 1 + fi +} + +aom_cx_set_ref_tests="aom_cx_set_ref_av1" + +run_tests aom_cx_set_ref_verify_environment "${aom_cx_set_ref_tests}" + diff --git a/third_party/aom/test/aomdec.sh b/third_party/aom/test/aomdec.sh new file mode 100755 index 0000000000..e9738a8e89 --- /dev/null +++ b/third_party/aom/test/aomdec.sh @@ -0,0 +1,219 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests aomdec. To add new tests to this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to aomdec_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +AV1_MONOCHROME_B10="${LIBAOM_TEST_DATA_PATH}/av1-1-b10-24-monochrome.ivf" +AV1_MONOCHROME_B8="${LIBAOM_TEST_DATA_PATH}/av1-1-b8-24-monochrome.ivf" + +# Environment check: Make sure input is available. +aomdec_verify_environment() { + if [ "$(av1_encode_available)" != "yes" ] ; then + if [ ! -e "${AV1_IVF_FILE}" ] || \ + [ ! -e "${AV1_OBU_ANNEXB_FILE}" ] || \ + [ ! -e "${AV1_OBU_SEC5_FILE}" ] || \ + [ ! -e "${AV1_WEBM_FILE}" ]; then + elog "Libaom test data must exist before running this test script when " \ + " encoding is disabled. " + return 1 + fi + fi + if [ ! -e "${AV1_MONOCHROME_B10}" ] || [ ! -e "${AV1_MONOCHROME_B8}" ]; then + elog "Libaom test data must exist before running this test script." + fi + if [ -z "$(aom_tool_path aomdec)" ]; then + elog "aomdec not found. It must exist in LIBAOM_BIN_PATH or its parent." + return 1 + fi +} + +# Wrapper function for running aomdec with pipe input. Requires that +# LIBAOM_BIN_PATH points to the directory containing aomdec. $1 is used as the +# input file path and shifted away. All remaining parameters are passed through +# to aomdec. +aomdec_pipe() { + local input="$1" + shift + if [ ! -e "${input}" ]; then + elog "Input file ($input) missing in aomdec_pipe()" + return 1 + fi + cat "${file}" | aomdec - "$@" ${devnull} +} + + +# Wrapper function for running aomdec. Requires that LIBAOM_BIN_PATH points to +# the directory containing aomdec. $1 one is used as the input file path and +# shifted away. All remaining parameters are passed through to aomdec. +aomdec() { + local decoder="$(aom_tool_path aomdec)" + local input="$1" + shift + eval "${AOM_TEST_PREFIX}" "${decoder}" "$input" "$@" ${devnull} +} + +aomdec_can_decode_av1() { + if [ "$(av1_decode_available)" = "yes" ]; then + echo yes + fi +} + +aomdec_av1_ivf() { + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + local file="${AV1_IVF_FILE}" + if [ ! -e "${file}" ]; then + encode_yuv_raw_input_av1 "${file}" --ivf || return 1 + fi + aomdec "${AV1_IVF_FILE}" --summary --noblit + fi +} + +aomdec_av1_ivf_error_resilient() { + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + local file="av1.error-resilient.ivf" + if [ ! -e "${file}" ]; then + encode_yuv_raw_input_av1 "${file}" --ivf --error-resilient=1 || return 1 + fi + aomdec "${file}" --summary --noblit + fi +} + +ivf_multithread() { + local row_mt="$1" + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + local file="${AV1_IVF_FILE}" + if [ ! -e "${file}" ]; then + encode_yuv_raw_input_av1 "${file}" --ivf || return 1 + fi + for threads in 2 3 4 5 6 7 8; do + aomdec "${file}" --summary --noblit --threads=$threads --row-mt=$row_mt \ + || return 1 + done + fi +} + +aomdec_av1_ivf_multithread() { + ivf_multithread 0 # --row-mt=0 +} + +aomdec_av1_ivf_multithread_row_mt() { + ivf_multithread 1 # --row-mt=1 +} + +aomdec_aom_ivf_pipe_input() { + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + local file="${AV1_IVF_FILE}" + if [ ! -e "${file}" ]; then + encode_yuv_raw_input_av1 "${file}" --ivf || return 1 + fi + aomdec_pipe "${AV1_IVF_FILE}" --summary --noblit + fi +} + +aomdec_av1_obu_annexb() { + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + local file="${AV1_OBU_ANNEXB_FILE}" + if [ ! -e "${file}" ]; then + encode_yuv_raw_input_av1 "${file}" --obu --annexb=1 || return 1 + fi + aomdec "${file}" --summary --noblit --annexb + fi +} + +aomdec_av1_obu_annexb_pipe_input() { + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + local file="${AV1_OBU_ANNEXB_FILE}" + if [ ! -e "${file}" ]; then + encode_yuv_raw_input_av1 "${file}" --obu --annexb=1 || return 1 + fi + aomdec_pipe "${file}" --summary --noblit --annexb + fi +} + +aomdec_av1_obu_section5() { + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + local file="${AV1_OBU_SEC5_FILE}" + if [ ! -e "${file}" ]; then + encode_yuv_raw_input_av1 "${file}" --obu || return 1 + fi + aomdec "${file}" --summary --noblit + fi +} + +aomdec_av1_obu_section5_pipe_input() { + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + local file="${AV1_OBU_SEC5_FILE}" + if [ ! -e "${file}" ]; then + encode_yuv_raw_input_av1 "${file}" --obu || return 1 + fi + aomdec_pipe "${file}" --summary --noblit + fi +} + +aomdec_av1_webm() { + if [ "$(aomdec_can_decode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local file="${AV1_WEBM_FILE}" + if [ ! -e "${file}" ]; then + encode_yuv_raw_input_av1 "${file}" || return 1 + fi + aomdec "${AV1_WEBM_FILE}" --summary --noblit + fi +} + +aomdec_av1_monochrome_yuv() { + if [ "$(aomdec_can_decode_av1)" = "yes" ]; then + local input="$1" + local basename="$(basename "${input}")" + local output="${basename}-%wx%h-%4.i420" + local md5file="${AOM_TEST_OUTPUT_DIR}/${basename}.md5" + local decoder="$(aom_tool_path aomdec)" + # Note aomdec() is not used to avoid ${devnull} which may also redirect + # stdout. + eval "${AOM_TEST_PREFIX}" "${decoder}" --md5 --i420 \ + -o "${output}" "${input}" ">" "${md5file}" 2>&1 || return 1 + diff "${1}.md5" "${md5file}" + fi +} + +aomdec_av1_monochrome_yuv_8bit() { + aomdec_av1_monochrome_yuv "${AV1_MONOCHROME_B8}" +} + +aomdec_av1_monochrome_yuv_10bit() { + aomdec_av1_monochrome_yuv "${AV1_MONOCHROME_B10}" +} + +aomdec_tests="aomdec_av1_ivf + aomdec_av1_ivf_multithread + aomdec_av1_ivf_multithread_row_mt + aomdec_aom_ivf_pipe_input + aomdec_av1_monochrome_yuv_8bit" + +if [ ! "$(realtime_only_build)" = "yes" ]; then + aomdec_tests="${aomdec_tests} + aomdec_av1_ivf_error_resilient + aomdec_av1_obu_annexb + aomdec_av1_obu_section5 + aomdec_av1_obu_annexb_pipe_input + aomdec_av1_obu_section5_pipe_input + aomdec_av1_webm" +fi + +if [ "$(highbitdepth_available)" = "yes" ]; then + aomdec_tests="${aomdec_tests} + aomdec_av1_monochrome_yuv_10bit" +fi + +run_tests aomdec_verify_environment "${aomdec_tests}" diff --git a/third_party/aom/test/aomenc.sh b/third_party/aom/test/aomenc.sh new file mode 100755 index 0000000000..0bb9fba3b8 --- /dev/null +++ b/third_party/aom/test/aomenc.sh @@ -0,0 +1,306 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests aomenc using hantro_collage_w352h288.yuv as input. To add +## new tests to this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to aomenc_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available. +aomenc_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then + elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in" + elog "LIBAOM_TEST_DATA_PATH." + return 1 + fi + fi + if [ -z "$(aom_tool_path aomenc)" ]; then + elog "aomenc not found. It must exist in LIBAOM_BIN_PATH or its parent." + return 1 + fi +} + +aomenc_can_encode_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + echo yes + fi +} + +# Utilities that echo aomenc input file parameters. +y4m_input_non_square_par() { + echo ""${Y4M_NOSQ_PAR_INPUT}"" +} + +y4m_input_720p() { + echo ""${Y4M_720P_INPUT}"" +} + +# Wrapper function for running aomenc with pipe input. Requires that +# LIBAOM_BIN_PATH points to the directory containing aomenc. $1 is used as the +# input file path and shifted away. All remaining parameters are passed through +# to aomenc. +aomenc_pipe() { + local encoder="$(aom_tool_path aomenc)" + local input="$1" + shift + cat "${input}" | eval "${AOM_TEST_PREFIX}" "${encoder}" - \ + --test-decode=fatal \ + "$@" ${devnull} +} + +# Wrapper function for running aomenc. Requires that LIBAOM_BIN_PATH points to +# the directory containing aomenc. $1 one is used as the input file path and +# shifted away. All remaining parameters are passed through to aomenc. +aomenc() { + local encoder="$(aom_tool_path aomenc)" + local input="$1" + shift + eval "${AOM_TEST_PREFIX}" "${encoder}" "${input}" \ + --test-decode=fatal \ + "$@" ${devnull} +} + +aomenc_av1_ivf() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local output="${AV1_IVF_FILE}" + if [ -e "${AV1_IVF_FILE}" ]; then + output="${AOM_TEST_OUTPUT_DIR}/av1_test.ivf" + fi + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --ivf \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_ivf_rt() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local output="${AV1_IVF_FILE}" + if [ -e "${AV1_IVF_FILE}" ]; then + output="${AOM_TEST_OUTPUT_DIR}/av1_test.ivf" + fi + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_rt_params) \ + --ivf \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_ivf_use_16bit_internal() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local output="${AV1_IVF_FILE}" + if [ -e "${AV1_IVF_FILE}" ]; then + output="${AOM_TEST_OUTPUT_DIR}/av1_test_16bit.ivf" + fi + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --ivf \ + --use-16bit-internal \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_obu_annexb() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local output="${AV1_OBU_ANNEXB_FILE}" + if [ -e "${AV1_OBU_ANNEXB_FILE}" ]; then + output="${AOM_TEST_OUTPUT_DIR}/av1_test.annexb.obu" + fi + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --obu \ + --annexb=1 \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_obu_section5() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local output="${AV1_OBU_SEC5_FILE}" + if [ -e "${AV1_OBU_SEC5_FILE}" ]; then + output="${AOM_TEST_OUTPUT_DIR}/av1_test.section5.obu" + fi + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --obu \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_webm() { + if [ "$(aomenc_can_encode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local output="${AV1_WEBM_FILE}" + if [ -e "${AV1_WEBM_FILE}" ]; then + output="${AOM_TEST_OUTPUT_DIR}/av1_test.webm" + fi + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_webm_1pass() { + if [ "$(aomenc_can_encode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local output="${AOM_TEST_OUTPUT_DIR}/av1_test.webm" + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --passes=1 \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_ivf_lossless() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local output="${AOM_TEST_OUTPUT_DIR}/av1_lossless.ivf" + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --ivf \ + --output="${output}" \ + --lossless=1 || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_ivf_minq0_maxq0() { + if [ "$(aomenc_can_encode_av1)" = "yes" ]; then + local output="${AOM_TEST_OUTPUT_DIR}/av1_lossless_minq0_maxq0.ivf" + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --ivf \ + --output="${output}" \ + --min-q=0 \ + --max-q=0 || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_webm_lag5_frames10() { + if [ "$(aomenc_can_encode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local lag_total_frames=10 + local lag_frames=5 + local output="${AOM_TEST_OUTPUT_DIR}/av1_lag5_frames10.webm" + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --limit=${lag_total_frames} \ + --lag-in-frames=${lag_frames} \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +# TODO(fgalligan): Test that DisplayWidth is different than video width. +aomenc_av1_webm_non_square_par() { + if [ "$(aomenc_can_encode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + local output="${AOM_TEST_OUTPUT_DIR}/av1_non_square_par.webm" + aomenc $(y4m_input_non_square_par) \ + $(aomenc_encode_test_fast_params) \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +aomenc_av1_webm_cdf_update_mode() { + if [ "$(aomenc_can_encode_av1)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + for mode in 0 1 2; do + local output="${AOM_TEST_OUTPUT_DIR}/cdf_mode_${mode}.webm" + aomenc $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --cdf-update-mode=${mode} \ + --output="${output}" || return 1 + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + done + fi +} + +if [ "$(realtime_only_build)" = "yes" ]; then + aomenc_tests="aomenc_av1_ivf_rt" +else + aomenc_tests="aomenc_av1_ivf + aomenc_av1_ivf_rt + aomenc_av1_obu_annexb + aomenc_av1_obu_section5 + aomenc_av1_webm + aomenc_av1_webm_1pass + aomenc_av1_ivf_lossless + aomenc_av1_ivf_minq0_maxq0 + aomenc_av1_ivf_use_16bit_internal + aomenc_av1_webm_lag5_frames10 + aomenc_av1_webm_non_square_par + aomenc_av1_webm_cdf_update_mode" +fi + +run_tests aomenc_verify_environment "${aomenc_tests}" diff --git a/third_party/aom/test/aq_segment_test.cc b/third_party/aom/test/aq_segment_test.cc new file mode 100644 index 0000000000..674a883ea2 --- /dev/null +++ b/third_party/aom/test/aq_segment_test.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "config/aom_config.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +const libaom_test::TestMode kTestModeParams[] = +#if CONFIG_REALTIME_ONLY + { ::libaom_test::kRealTime }; +#else + { ::libaom_test::kRealTime, ::libaom_test::kOnePassGood }; +#endif + +class AqSegmentTest + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int, + int>, + public ::libaom_test::EncoderTest { + protected: + AqSegmentTest() : EncoderTest(GET_PARAM(0)) {} + ~AqSegmentTest() override = default; + + void SetUp() override { + InitializeConfig(GET_PARAM(1)); + set_cpu_used_ = GET_PARAM(2); + aq_mode_ = 0; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_SET_AQ_MODE, aq_mode_); + encoder->Control(AV1E_SET_DELTAQ_MODE, deltaq_mode_); + encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100); + if (mode_ == ::libaom_test::kRealTime) { + encoder->Control(AV1E_SET_ALLOW_WARPED_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_OBMC, 0); + } + } + } + + void DoTest(int aq_mode) { + aq_mode_ = aq_mode; + deltaq_mode_ = 0; + cfg_.kf_max_dist = 12; + cfg_.rc_min_quantizer = 8; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 6; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_target_bitrate = 300; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 15); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + int set_cpu_used_; + int aq_mode_; + int deltaq_mode_; +}; + +// Validate that this AQ segmentation mode (1-variance_aq, 2-complexity_aq, +// 3-cyclic_refresh_aq) encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatch) { DoTest(GET_PARAM(3)); } + +#if !CONFIG_REALTIME_ONLY +// Validate that this delta q mode +// encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatchExtDeltaQ) { + cfg_.rc_end_usage = AOM_CQ; + aq_mode_ = 0; + deltaq_mode_ = 2; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 15); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +#endif + +AV1_INSTANTIATE_TEST_SUITE(AqSegmentTest, ::testing::ValuesIn(kTestModeParams), + ::testing::Range(5, 9), ::testing::Range(0, 4)); + +#if !CONFIG_REALTIME_ONLY +class AqSegmentTestLarge : public AqSegmentTest {}; + +TEST_P(AqSegmentTestLarge, TestNoMisMatch) { DoTest(GET_PARAM(3)); } + +AV1_INSTANTIATE_TEST_SUITE(AqSegmentTestLarge, + ::testing::Values(::libaom_test::kOnePassGood), + ::testing::Range(3, 5), ::testing::Range(0, 4)); +#endif +} // namespace diff --git a/third_party/aom/test/arf_freq_test.cc b/third_party/aom/test/arf_freq_test.cc new file mode 100644 index 0000000000..f51444da4d --- /dev/null +++ b/third_party/aom/test/arf_freq_test.cc @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" +#include "av1/encoder/ratectrl.h" + +namespace { + +const unsigned int kFrames = 100; +const int kBitrate = 500; + +#define ARF_NOT_SEEN 1000001 +#define ARF_SEEN_ONCE 1000000 + +typedef struct { + const char *filename; + unsigned int width; + unsigned int height; + unsigned int framerate_num; + unsigned int framerate_den; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +typedef struct { + libaom_test::TestMode mode; + int cpu_used; +} TestEncodeParam; + +const TestVideoParam kTestVectors[] = { + // artificially increase framerate to trigger default check + { "hantro_collage_w352h288.yuv", 352, 288, 5000, 1, 8, AOM_IMG_FMT_I420, + AOM_BITS_8, 0 }, + { "hantro_collage_w352h288.yuv", 352, 288, 30, 1, 8, AOM_IMG_FMT_I420, + AOM_BITS_8, 0 }, + { "rush_hour_444.y4m", 352, 288, 30, 1, 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 }, + // Add list of profile 2/3 test videos here ... +}; + +const TestEncodeParam kEncodeVectors[] = { +#if CONFIG_REALTIME_ONLY + { ::libaom_test::kRealTime, 5 }, +#else + { ::libaom_test::kRealTime, 5 }, { ::libaom_test::kOnePassGood, 2 }, + { ::libaom_test::kOnePassGood, 5 }, { ::libaom_test::kTwoPassGood, 1 }, + { ::libaom_test::kTwoPassGood, 2 }, { ::libaom_test::kTwoPassGood, 5 }, +#endif +}; + +const int kMinArfVectors[] = { + // NOTE: 0 refers to the default built-in logic in: + // av1_rc_get_default_min_gf_interval(...) + 0, 4, 8, 12, 15 +}; + +class ArfFreqTestLarge + : public ::libaom_test::CodecTestWith3Params<TestVideoParam, + TestEncodeParam, int>, + public ::libaom_test::EncoderTest { + protected: + ArfFreqTestLarge() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), + test_encode_param_(GET_PARAM(2)), min_arf_requested_(GET_PARAM(3)) {} + + ~ArfFreqTestLarge() override = default; + + void SetUp() override { + InitializeConfig(test_encode_param_.mode); + if (test_encode_param_.mode != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 25; + } else { + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + } + } + + void BeginPassHook(unsigned int) override { + min_run_ = ARF_NOT_SEEN; + run_of_visible_frames_ = 0; + } + + int GetNumFramesInPkt(const aom_codec_cx_pkt_t *pkt) { + const uint8_t *buffer = reinterpret_cast<uint8_t *>(pkt->data.frame.buf); + const uint8_t marker = buffer[pkt->data.frame.sz - 1]; + const int mag = ((marker >> 3) & 3) + 1; + int frames = (marker & 0x7) + 1; + const unsigned int index_sz = 2 + mag * frames; + // Check for superframe or not. + // Assume superframe has only one visible frame, the rest being + // invisible. If superframe index is not found, then there is only + // one frame. + if (!((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz && + buffer[pkt->data.frame.sz - index_sz] == marker)) { + frames = 1; + } + return frames; + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return; + const int frames = GetNumFramesInPkt(pkt); + if (frames == 1) { + run_of_visible_frames_++; + } else if (frames == 2) { + if (min_run_ == ARF_NOT_SEEN) { + min_run_ = ARF_SEEN_ONCE; + } else if (min_run_ == ARF_SEEN_ONCE || + run_of_visible_frames_ < min_run_) { + min_run_ = run_of_visible_frames_; + } + run_of_visible_frames_ = 1; + } else { + min_run_ = 0; + run_of_visible_frames_ = 1; + } + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 4); + encoder->Control(AOME_SET_CPUUSED, test_encode_param_.cpu_used); + encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_arf_requested_); + if (test_encode_param_.mode != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + } + + int GetMinVisibleRun() const { return min_run_; } + + int GetMinArfDistanceRequested() const { + if (min_arf_requested_) + return min_arf_requested_; + else + return av1_rc_get_default_min_gf_interval( + test_video_param_.width, test_video_param_.height, + (double)test_video_param_.framerate_num / + test_video_param_.framerate_den); + } + + TestVideoParam test_video_param_; + TestEncodeParam test_encode_param_; + + private: + int min_arf_requested_; + int min_run_; + int run_of_visible_frames_; +}; + +TEST_P(ArfFreqTestLarge, MinArfFreqTest) { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + std::unique_ptr<libaom_test::VideoSource> video; + if (is_extension_y4m(test_video_param_.filename)) { + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + } else { + video.reset(new libaom_test::YUVVideoSource( + test_video_param_.filename, test_video_param_.fmt, + test_video_param_.width, test_video_param_.height, + test_video_param_.framerate_num, test_video_param_.framerate_den, 0, + kFrames)); + } + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const int min_run = GetMinVisibleRun(); + const int min_arf_dist_requested = GetMinArfDistanceRequested(); + if (min_run != ARF_NOT_SEEN && min_run != ARF_SEEN_ONCE) { + const int min_arf_dist = min_run + 1; + EXPECT_GE(min_arf_dist, min_arf_dist_requested); + } +} + +#if CONFIG_AV1_ENCODER +// TODO(angiebird): 25-29 fail in high bitdepth mode. +// TODO(zoeliu): This ArfFreqTest does not work with BWDREF_FRAME, as +// BWDREF_FRAME is also a non-show frame, and the minimum run between two +// consecutive BWDREF_FRAME's may vary between 1 and any arbitrary positive +// number as long as it does not exceed the gf_group interval. +INSTANTIATE_TEST_SUITE_P( + DISABLED_AV1, ArfFreqTestLarge, + ::testing::Combine( + ::testing::Values( + static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)), + ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors), + ::testing::ValuesIn(kMinArfVectors))); +#endif // CONFIG_AV1_ENCODER +} // namespace diff --git a/third_party/aom/test/av1_c_vs_simd_encode.sh b/third_party/aom/test/av1_c_vs_simd_encode.sh new file mode 100755 index 0000000000..296204d118 --- /dev/null +++ b/third_party/aom/test/av1_c_vs_simd_encode.sh @@ -0,0 +1,566 @@ +#!/bin/sh +## Copyright (c) 2023, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This script checks the bit exactness between C and SIMD +## implementations of AV1 encoder. +## +. $(dirname $0)/tools_common.sh + +PRESETS="good rt" +LOWBD_CIF_CLIP="yuv_raw_input" +LOWBD_480p_CLIP="yuv_480p_raw_input" +LOWBD_720p_CLIP="y4m_720p_input" +HIGHBD_CLIP="y4m_360p_10bit_input" +SC_CLIP="y4m_screen_input" +OUT_FILE_SUFFIX=".ivf" +SCRIPT_DIR=$(dirname "$0") +LIBAOM_SOURCE_DIR=$(cd ${SCRIPT_DIR}/..; pwd) + +# Clips used in test. +YUV_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/hantro_collage_w352h288.yuv" +YUV_480P_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/niklas_640_480_30.yuv" +Y4M_360P_10BIT_INPUT="${LIBAOM_TEST_DATA_PATH}/crowd_run_360p_10_150f.y4m" +Y4M_720P_INPUT="${LIBAOM_TEST_DATA_PATH}/niklas_1280_720_30.y4m" +Y4M_SCREEN_INPUT="${LIBAOM_TEST_DATA_PATH}/wikipedia_420_360p_60f.y4m" + +# Number of frames to test. +AV1_ENCODE_C_VS_SIMD_TEST_FRAME_LIMIT=35 + +# Create a temporary directory for output files. +if [ -n "${TMPDIR}" ]; then + AOM_TEST_TEMP_ROOT="${TMPDIR}" +elif [ -n "${TEMPDIR}" ]; then + AOM_TEST_TEMP_ROOT="${TEMPDIR}" +else + AOM_TEST_TEMP_ROOT=/tmp +fi + +AOM_TEST_OUTPUT_DIR="${AOM_TEST_TEMP_ROOT}/av1_test_$$" + +if ! mkdir -p "${AOM_TEST_OUTPUT_DIR}" || \ + [ ! -d "${AOM_TEST_OUTPUT_DIR}" ]; then + echo "${0##*/}: Cannot create output directory, giving up." + echo "${0##*/}: AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}" + exit 1 +fi + +elog() { + echo "$@" 1>&2 +} + +# Echoes path to $1 when it's executable and exists in ${AOM_TEST_OUTPUT_DIR}, +# or an empty string. Caller is responsible for testing the string once the +# function returns. +av1_enc_tool_path() { + local target="$1" + local preset="$2" + local tool_path="${AOM_TEST_OUTPUT_DIR}/build_target_${target}/aomenc_${preset}" + + if [ ! -x "${tool_path}" ]; then + tool_path="" + fi + echo "${tool_path}" +} + +# Environment check: Make sure input and source directories are available. +av1_c_vs_simd_enc_verify_environment () { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ ! -e "${Y4M_360P_10BIT_INPUT}" ]; then + elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ ! -e "${YUV_480P_RAW_INPUT}" ]; then + elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ ! -e "${Y4M_720P_INPUT}" ]; then + elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ ! -e "${Y4M_SCREEN_INPUT}" ]; then + elog "libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ ! -d "$LIBAOM_SOURCE_DIR" ]; then + elog "LIBAOM_SOURCE_DIR does not exist." + return 1 + fi +} + +# This is not needed since tools_common.sh does the same cleanup. +# Keep the code here for our reference. +# cleanup() { +# rm -rf ${AOM_TEST_OUTPUT_DIR} +# } + +# Echo AOM_SIMD_CAPS_MASK for different instruction set architecture. +avx512f() { + echo "0x1FF" +} + +avx2() { + echo "0x0FF" +} + +avx() { + echo "0x07F" +} + +sse4_1() { + echo "0x03F" +} + +ssse3() { + echo "0x01F" +} + +sse3() { + echo "0x00F" +} + +sse2() { + echo "0x007" +} + +get_bitrates() { + local content=$1 + local preset=$2 + + # Bit-rates: + local bitrate_lowres_good="300" + local bitrate_480p_good="500" + local bitrate_720p_good="1000" + local bitrate_scc_360p_good="500" + local bitrate_lowres_rt="200" + local bitrate_480p_rt="300" + local bitrate_720p_rt="600" + local bitrate_scc_360p_rt="300" + local bitrate_hbd_360p="500" + + if [ "${preset}" = "good" ]; then + if [ "${content}" = "yuv_raw_input" ]; then + echo "${bitrate_lowres_good}" + elif [ "${content}" = "yuv_480p_raw_input" ]; then + echo "${bitrate_480p_good}" + elif [ "${content}" = "y4m_720p_input" ]; then + echo "${bitrate_720p_good}" + elif [ "${content}" = "y4m_screen_input" ]; then + echo "${bitrate_scc_360p_good}" + elif [ "${content}" = "y4m_360p_10bit_input" ]; then + echo "${bitrate_hbd_360p}" + else + elog "Invalid content" + fi + elif [ "${preset}" = "rt" ]; then + if [ "${content}" = "yuv_raw_input" ]; then + echo "${bitrate_lowres_rt}" + elif [ "${content}" = "yuv_480p_raw_input" ]; then + echo "${bitrate_480p_rt}" + elif [ "${content}" = "y4m_720p_input" ]; then + echo "${bitrate_720p_rt}" + elif [ "${content}" = "y4m_screen_input" ]; then + echo "${bitrate_scc_360p_rt}" + elif [ "${content}" = "y4m_360p_10bit_input" ]; then + echo "${bitrate_hbd_360p}" + else + elog "Invalid content" + fi + else + elog "invalid preset" + fi +} + +# Echo clip details to be used as input to aomenc. +yuv_raw_input() { + echo ""${YUV_RAW_INPUT}" + --width=352 + --height=288 + --bit-depth=8" +} + +y4m_360p_10bit_input() { + echo ""${Y4M_360P_10BIT_INPUT}" + --bit-depth=10" +} + +yuv_480p_raw_input() { + echo ""${YUV_480P_RAW_INPUT}" + --width=640 + --height=480 + --bit-depth=8" +} + +y4m_720p_input() { + echo ""${Y4M_720P_INPUT}" + --bit-depth=8" +} + +y4m_screen_input() { + echo ""${Y4M_SCREEN_INPUT}" + --tune-content=screen + --enable-palette=1 + --bit-depth=8" +} + +has_x86_isa_extn() { + instruction_set=$1 + if ! grep -q "$instruction_set" /proc/cpuinfo; then + # This instruction set is not supported. + return 1 + fi +} + +# Echo good encode params for use with AV1 encoder. +av1_encode_good_params() { + echo "--good \ + --ivf \ + --profile=0 \ + --static-thresh=0 \ + --threads=1 \ + --tile-columns=0 \ + --tile-rows=0 \ + --verbose \ + --end-usage=vbr \ + --kf-max-dist=160 \ + --kf-min-dist=0 \ + --max-q=63 \ + --min-q=0 \ + --overshoot-pct=100 \ + --undershoot-pct=100 \ + --passes=2 \ + --arnr-maxframes=7 \ + --arnr-strength=5 \ + --auto-alt-ref=1 \ + --drop-frame=0 \ + --frame-parallel=0 \ + --lag-in-frames=35 \ + --maxsection-pct=2000 \ + --minsection-pct=0 \ + --sharpness=0" +} + +# Echo realtime encode params for use with AV1 encoder. +av1_encode_rt_params() { + echo "--rt \ + --ivf \ + --profile=0 \ + --static-thresh=0 \ + --threads=1 \ + --tile-columns=0 \ + --tile-rows=0 \ + --verbose \ + --end-usage=cbr \ + --kf-max-dist=90000 \ + --max-q=58 \ + --min-q=2 \ + --overshoot-pct=50 \ + --undershoot-pct=50 \ + --passes=1 \ + --aq-mode=3 \ + --buf-initial-sz=500 \ + --buf-optimal-sz=600 \ + --buf-sz=1000 \ + --coeff-cost-upd-freq=3 \ + --dv-cost-upd-freq=3 \ + --mode-cost-upd-freq=3 \ + --mv-cost-upd-freq=3 \ + --deltaq-mode=0 \ + --enable-global-motion=0 \ + --enable-obmc=0 \ + --enable-order-hint=0 \ + --enable-ref-frame-mvs=0 \ + --enable-tpl-model=0 \ + --enable-warped-motion=0 \ + --lag-in-frames=0 \ + --max-intra-rate=300 \ + --noise-sensitivity=0" +} + +# Configures for the given target in AOM_TEST_OUTPUT_DIR/build_target_${target} +# directory. +av1_enc_build() { + local target="$1" + local cmake_command="$2" + local tmp_build_dir=${AOM_TEST_OUTPUT_DIR}/build_target_${target} + if [ -d "$tmp_build_dir" ]; then + rm -rf $tmp_build_dir + fi + + mkdir -p $tmp_build_dir + cd $tmp_build_dir + + local cmake_common_args="-DCONFIG_EXCLUDE_SIMD_MISMATCH=1 \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_CCACHE=1 \ + '-DCMAKE_C_FLAGS_RELEASE=-O3 -g' \ + '-DCMAKE_CXX_FLAGS_RELEASE=-O3 -g' \ + -DENABLE_DOCS=0 -DENABLE_TESTS=0 -DENABLE_TOOLS=0" + + for preset in $PRESETS; do + echo "Building target[${preset} encoding]: ${target}" + if [ "${preset}" = "good" ]; then + local cmake_extra_args="-DCONFIG_AV1_HIGHBITDEPTH=1" + elif [ "${preset}" = "rt" ]; then + local cmake_extra_args="-DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0" + else + elog "Invalid preset" + return 1 + fi + if ! eval "$cmake_command" "${cmake_common_args}" "${cmake_extra_args}" \ + ${devnull}; then + elog "cmake failure" + return 1 + fi + if ! eval make -j$(nproc) aomenc ${devnull}; then + elog "build failure" + return 1 + fi + + mv aomenc aomenc_${preset} + done + echo "Done building target: ${target}" +} + +compare_enc_output() { + local target=$1 + local cpu=$2 + local clip=$3 + local bitrate=$4 + local preset=$5 + if ! diff -q ${AOM_TEST_OUTPUT_DIR}/Out-generic-"${clip}"-${preset}-${bitrate}kbps-cpu${cpu}${OUT_FILE_SUFFIX} \ + ${AOM_TEST_OUTPUT_DIR}/Out-${target}-"${clip}"-${preset}-${bitrate}kbps-cpu${cpu}${OUT_FILE_SUFFIX}; then + elog "C vs ${target} encode mismatches for ${clip}, at ${bitrate} kbps, speed ${cpu}, ${preset} preset" + return 1 + fi +} + +av1_enc_test() { + local encoder="$1" + local arch="$2" + local target="$3" + local preset="$4" + if [ -z "$(av1_enc_tool_path "${target}" "${preset}")" ]; then + elog "aomenc_{preset} not found. It must exist in ${AOM_TEST_OUTPUT_DIR}/build_target_${target} path" + return 1 + fi + + if [ "${preset}" = "good" ]; then + if [ "${arch}" = "x86_64" ]; then + local min_cpu_used=0 + local max_cpu_used=6 + elif [ "${arch}" = "x86" ]; then + local min_cpu_used=2 + local max_cpu_used=3 + fi + local test_params=av1_encode_good_params + elif [ "${preset}" = "rt" ]; then + local min_cpu_used=5 + local max_cpu_used=11 + local test_params=av1_encode_rt_params + else + elog "Invalid preset" + return 1 + fi + + for cpu in $(seq $min_cpu_used $max_cpu_used); do + if [ "${preset}" = "good" ]; then + if [ "${arch}" = "x86_64" ]; then + if [ "${cpu}" -lt 2 ]; then + local test_clips="${LOWBD_CIF_CLIP} ${HIGHBD_CLIP}" + elif [ "${cpu}" -lt 5 ]; then + local test_clips="${LOWBD_480p_CLIP} ${HIGHBD_CLIP}" + else + local test_clips="${LOWBD_720p_CLIP} ${HIGHBD_CLIP}" + fi + elif [ "${arch}" = "x86" ]; then + local test_clips="${LOWBD_CIF_CLIP} ${HIGHBD_CLIP}" + elif [ "${arch}" = "arm64" ]; then + local test_clips="${LOWBD_CIF_CLIP} ${HIGHBD_CLIP}" + fi + elif [ "${preset}" = "rt" ]; then + if [ "${cpu}" -lt 8 ]; then + local test_clips="${LOWBD_CIF_CLIP} ${SC_CLIP}" + else + local test_clips="${LOWBD_480p_CLIP} ${SC_CLIP}" + fi + else + elog "Invalid preset" + return 1 + fi + + for clip in ${test_clips}; do + local test_bitrates=$(get_bitrates ${clip} ${preset}) + for bitrate in ${test_bitrates}; do + eval "${encoder}" $($clip) $($test_params) \ + "--limit=${AV1_ENCODE_C_VS_SIMD_TEST_FRAME_LIMIT}" \ + "--cpu-used=${cpu}" "--target-bitrate=${bitrate}" "-o" \ + ${AOM_TEST_OUTPUT_DIR}/Out-${target}-"${clip}"-${preset}-${bitrate}kbps-cpu${cpu}${OUT_FILE_SUFFIX} \ + ${devnull} + + if [ "${target}" != "generic" ]; then + if ! compare_enc_output ${target} $cpu ${clip} $bitrate ${preset}; then + # Found a mismatch + return 1 + fi + fi + done + done + done +} + +av1_test_generic() { + local arch=$1 + local target="generic" + if [ $arch = "x86_64" ]; then + local cmake_command="cmake $LIBAOM_SOURCE_DIR -DAOM_TARGET_CPU=${target}" + elif [ $arch = "x86" ]; then + # As AV1 encode output differs for x86 32-bit and 64-bit platforms + # (BUG=aomedia:3479), the x86 32-bit C-only build is generated separately. + # The cmake command line option -DENABLE_MMX=0 flag disables all SIMD + # optimizations, and generates a C-only binary. + local cmake_command="cmake $LIBAOM_SOURCE_DIR -DENABLE_MMX=0 \ + -DCMAKE_TOOLCHAIN_FILE=${LIBAOM_SOURCE_DIR}/build/cmake/toolchains/i686-linux-gcc.cmake" + fi + + echo "Build for: Generic ${arch}" + if ! av1_enc_build "${target}" "${cmake_command}"; then + return 1 + fi + + for preset in $PRESETS; do + local encoder="$(av1_enc_tool_path "${target}" "${preset}")" + av1_enc_test $encoder "${arch}" "${target}" "${preset}" + done +} + +# This function encodes AV1 bitstream by enabling SSE2, SSE3, SSSE3, SSE4_1, AVX, AVX2 as there are +# no functions with MMX, SSE and AVX512 specialization. +# The value of environment variable 'AOM_SIMD_CAPS_MASK' controls enabling of different instruction +# set extension optimizations. The value of the flag 'AOM_SIMD_CAPS_MASK' and the corresponding +# instruction set extension optimization enabled are as follows: +# AVX512 AVX2 AVX SSE4_1 SSSE3 SSE3 SSE2 SSE MMX +# 1 1 1 1 1 1 1 1 1 -> 0x1FF -> Enable AVX512 and lower variants +# 0 1 1 1 1 1 1 1 1 -> 0x0FF -> Enable AVX2 and lower variants +# 0 0 1 1 1 1 1 1 1 -> 0x07F -> Enable AVX and lower variants +# 0 0 0 1 1 1 1 1 1 -> 0x03F -> Enable SSE4_1 and lower variants +# 0 0 0 0 1 1 1 1 1 -> 0x01F -> Enable SSSE3 and lower variants +# 0 0 0 0 0 1 1 1 1 -> 0x00F -> Enable SSE3 and lower variants +# 0 0 0 0 0 0 1 1 1 -> 0x007 -> Enable SSE2 and lower variants +# 0 0 0 0 0 0 0 1 1 -> 0x003 -> Enable SSE and lower variants +# 0 0 0 0 0 0 0 0 1 -> 0x001 -> Enable MMX +## NOTE: In x86_64 platform, it is not possible to enable sse/mmx/c using "AOM_SIMD_CAPS_MASK" as +# all x86_64 platforms implement sse2. +av1_test_x86() { + local arch=$1 + + if ! uname -m | grep -q "x86"; then + elog "Machine architecture is not x86 or x86_64" + return 0 + fi + + if [ $arch = "x86" ]; then + local target="x86-linux" + local cmake_command="cmake \ + $LIBAOM_SOURCE_DIR \ + -DCMAKE_TOOLCHAIN_FILE=${LIBAOM_SOURCE_DIR}/build/cmake/toolchains/i686-linux-gcc.cmake" + elif [ $arch = "x86_64" ]; then + local target="x86_64-linux" + local cmake_command="cmake $LIBAOM_SOURCE_DIR" + fi + + # Available x86 isa variants: "avx2 avx sse4_1 ssse3 sse3 sse2" + local x86_isa_variants="avx2 sse4_1 sse2" + + echo "Build for x86: ${target}" + if ! av1_enc_build "${target}" "${cmake_command}"; then + return 1 + fi + + for preset in $PRESETS; do + local encoder="$(av1_enc_tool_path "${target}" "${preset}")" + for isa in $x86_isa_variants; do + # Note that if has_x86_isa_extn returns 1, it is false, and vice versa. + if ! has_x86_isa_extn $isa; then + echo "${isa} is not supported in this machine" + continue + fi + export AOM_SIMD_CAPS_MASK=$($isa) + if ! av1_enc_test $encoder "${arch}" "${target}" "${preset}"; then + # Found a mismatch + return 1 + fi + unset AOM_SIMD_CAPS_MASK + done + done +} + +av1_test_arm() { + local arch="arm64" + local target="arm64-linux-gcc" + local cmake_command="cmake $LIBAOM_SOURCE_DIR \ + -DCMAKE_TOOLCHAIN_FILE=$LIBAOM_SOURCE_DIR/build/cmake/toolchains/${target}.cmake \ + -DCMAKE_C_FLAGS=-Wno-maybe-uninitialized" + echo "Build for arm64: ${target}" + if ! av1_enc_build "${target}" "${cmake_command}"; then + return 1 + fi + + for preset in $PRESETS; do + local encoder="$(av1_enc_tool_path "${target}" "${preset}")" + if ! av1_enc_test "qemu-aarch64 -L /usr/aarch64-linux-gnu ${encoder}" "${arch}" "${target}" "${preset}"; then + # Found a mismatch + return 1 + fi + done +} + +av1_c_vs_simd_enc_test () { + # Test x86 (32 bit) + # x86 requires the i686-linux-gnu toolchain: + # $ sudo apt-get install g++-i686-linux-gnu + echo "av1 test for x86 (32 bit): Started." + # Encode 'C' only + av1_test_generic "x86" + # Encode with SIMD optimizations enabled + if ! av1_test_x86 "x86"; then + echo "av1 test for x86 (32 bit): Done, test failed." + return 1 + else + echo "av1 test for x86 (32 bit): Done, all tests passed." + fi + + # Test x86_64 (64 bit) + if [ "$(eval uname -m)" = "x86_64" ]; then + echo "av1 test for x86_64 (64 bit): Started." + # Encode 'C' only + av1_test_generic "x86_64" + # Encode with SIMD optimizations enabled + if ! av1_test_x86 "x86_64"; then + echo "av1 test for x86_64 (64 bit): Done, test failed." + return 1 + else + echo "av1 test for x86_64 (64 bit): Done, all tests passed." + fi + fi + + # Test ARM + echo "av1_test_arm: Started." + if ! av1_test_arm; then + echo "av1 test for arm: Done, test failed." + return 1 + else + echo "av1 test for arm: Done, all tests passed." + fi +} + +run_tests av1_c_vs_simd_enc_verify_environment av1_c_vs_simd_enc_test diff --git a/third_party/aom/test/av1_common_int_test.cc b/third_party/aom/test/av1_common_int_test.cc new file mode 100644 index 0000000000..dde2542e3d --- /dev/null +++ b/third_party/aom/test/av1_common_int_test.cc @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "av1/common/av1_common_int.h" + +TEST(AV1CommonInt, TestGetTxSize) { + for (int t = TX_4X4; t < TX_SIZES_ALL; t++) { + TX_SIZE t2 = get_tx_size(tx_size_wide[t], tx_size_high[t]); + GTEST_ASSERT_EQ(tx_size_wide[t], tx_size_wide[t2]); + GTEST_ASSERT_EQ(tx_size_high[t], tx_size_high[t2]); + } +} diff --git a/third_party/aom/test/av1_config_test.cc b/third_party/aom/test/av1_config_test.cc new file mode 100644 index 0000000000..3ff816c163 --- /dev/null +++ b/third_party/aom/test/av1_config_test.cc @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <string.h> + +#include "common/av1_config.h" +#include "test/util.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +// +// Input buffers containing exactly one Sequence Header OBU. +// +// Each buffer is named according to the OBU storage format (Annex-B vs Low +// Overhead Bitstream Format) and the type of Sequence Header OBU ("Full" +// Sequence Header OBUs vs Sequence Header OBUs with the +// reduced_still_image_flag set). +// +const uint8_t kAnnexBFullSequenceHeaderObu[] = { 0x0c, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x45, 0x7e, 0x3e, 0xff, + 0xfc, 0xc0, 0x20 }; +const uint8_t kAnnexBReducedStillImageSequenceHeaderObu[] = { + 0x08, 0x08, 0x18, 0x22, 0x2b, 0xf1, 0xfe, 0xc0, 0x20 +}; + +const uint8_t kLobfFullSequenceHeaderObu[] = { 0x0a, 0x0b, 0x00, 0x00, 0x00, + 0x04, 0x45, 0x7e, 0x3e, 0xff, + 0xfc, 0xc0, 0x20 }; + +const uint8_t kLobfReducedStillImageSequenceHeaderObu[] = { 0x0a, 0x07, 0x18, + 0x22, 0x2b, 0xf1, + 0xfe, 0xc0, 0x20 }; + +const uint8_t kAv1cAllZero[] = { 0, 0, 0, 0 }; + +// The size of AV1 config when no configOBUs are present at the end of the +// configuration structure. +const size_t kAv1cNoConfigObusSize = 4; + +bool VerifyAv1c(const uint8_t *const obu_buffer, size_t obu_buffer_length, + bool is_annexb) { + Av1Config av1_config; + memset(&av1_config, 0, sizeof(av1_config)); + bool parse_ok = get_av1config_from_obu(obu_buffer, obu_buffer_length, + is_annexb, &av1_config) == 0; + if (parse_ok) { + EXPECT_EQ(1, av1_config.marker); + EXPECT_EQ(1, av1_config.version); + EXPECT_EQ(0, av1_config.seq_profile); + EXPECT_EQ(0, av1_config.seq_level_idx_0); + EXPECT_EQ(0, av1_config.seq_tier_0); + EXPECT_EQ(0, av1_config.high_bitdepth); + EXPECT_EQ(0, av1_config.twelve_bit); + EXPECT_EQ(0, av1_config.monochrome); + EXPECT_EQ(1, av1_config.chroma_subsampling_x); + EXPECT_EQ(1, av1_config.chroma_subsampling_y); + EXPECT_EQ(0, av1_config.chroma_sample_position); + EXPECT_EQ(0, av1_config.initial_presentation_delay_present); + EXPECT_EQ(0, av1_config.initial_presentation_delay_minus_one); + } + return parse_ok && ::testing::Test::HasFailure() == false; +} + +TEST(Av1Config, ObuInvalidInputs) { + Av1Config av1_config; + memset(&av1_config, 0, sizeof(av1_config)); + ASSERT_EQ(-1, get_av1config_from_obu(nullptr, 0, 0, nullptr)); + ASSERT_EQ(-1, get_av1config_from_obu(&kLobfFullSequenceHeaderObu[0], 0, 0, + nullptr)); + ASSERT_EQ(-1, get_av1config_from_obu(&kLobfFullSequenceHeaderObu[0], + sizeof(kLobfFullSequenceHeaderObu), 0, + nullptr)); + ASSERT_EQ(-1, get_av1config_from_obu( + nullptr, sizeof(kLobfFullSequenceHeaderObu), 0, nullptr)); + ASSERT_EQ(-1, get_av1config_from_obu(&kLobfFullSequenceHeaderObu[0], 0, 0, + &av1_config)); +} + +TEST(Av1Config, ReadInvalidInputs) { + Av1Config av1_config; + memset(&av1_config, 0, sizeof(av1_config)); + size_t bytes_read = 0; + ASSERT_EQ(-1, read_av1config(nullptr, 0, nullptr, nullptr)); + ASSERT_EQ(-1, read_av1config(nullptr, 4, nullptr, nullptr)); + ASSERT_EQ(-1, read_av1config(&kAv1cAllZero[0], 0, nullptr, nullptr)); + ASSERT_EQ(-1, read_av1config(&kAv1cAllZero[0], 4, &bytes_read, nullptr)); + ASSERT_EQ(-1, read_av1config(nullptr, 4, &bytes_read, &av1_config)); +} + +TEST(Av1Config, WriteInvalidInputs) { + Av1Config av1_config; + memset(&av1_config, 0, sizeof(av1_config)); + size_t bytes_written = 0; + uint8_t av1c_buffer[4] = { 0 }; + ASSERT_EQ(-1, write_av1config(nullptr, 0, nullptr, nullptr)); + ASSERT_EQ(-1, write_av1config(&av1_config, 0, nullptr, nullptr)); + ASSERT_EQ(-1, write_av1config(&av1_config, 0, &bytes_written, nullptr)); + + ASSERT_EQ(-1, + write_av1config(&av1_config, 0, &bytes_written, &av1c_buffer[0])); + ASSERT_EQ(-1, write_av1config(&av1_config, 4, &bytes_written, nullptr)); +} + +TEST(Av1Config, GetAv1ConfigFromLobfObu) { + // Test parsing of a Sequence Header OBU with the reduced_still_picture_header + // unset-- aka a full Sequence Header OBU. + ASSERT_TRUE(VerifyAv1c(kLobfFullSequenceHeaderObu, + sizeof(kLobfFullSequenceHeaderObu), false)); + + // Test parsing of a reduced still image Sequence Header OBU. + ASSERT_TRUE(VerifyAv1c(kLobfReducedStillImageSequenceHeaderObu, + sizeof(kLobfReducedStillImageSequenceHeaderObu), + false)); +} + +TEST(Av1Config, GetAv1ConfigFromAnnexBObu) { + // Test parsing of a Sequence Header OBU with the reduced_still_picture_header + // unset-- aka a full Sequence Header OBU. + ASSERT_TRUE(VerifyAv1c(kAnnexBFullSequenceHeaderObu, + sizeof(kAnnexBFullSequenceHeaderObu), true)); + + // Test parsing of a reduced still image Sequence Header OBU. + ASSERT_TRUE(VerifyAv1c(kAnnexBReducedStillImageSequenceHeaderObu, + sizeof(kAnnexBReducedStillImageSequenceHeaderObu), + true)); +} + +TEST(Av1Config, ReadWriteConfig) { + Av1Config av1_config; + memset(&av1_config, 0, sizeof(av1_config)); + + // Test writing out the AV1 config. + size_t bytes_written = 0; + uint8_t av1c_buffer[4] = { 0 }; + ASSERT_EQ(0, write_av1config(&av1_config, sizeof(av1c_buffer), &bytes_written, + &av1c_buffer[0])); + ASSERT_EQ(kAv1cNoConfigObusSize, bytes_written); + for (size_t i = 0; i < kAv1cNoConfigObusSize; ++i) { + ASSERT_EQ(kAv1cAllZero[i], av1c_buffer[i]) + << "Mismatch in output Av1Config at offset=" << i; + } + + // Test reading the AV1 config. + size_t bytes_read = 0; + ASSERT_EQ(0, read_av1config(&kAv1cAllZero[0], sizeof(kAv1cAllZero), + &bytes_read, &av1_config)); + ASSERT_EQ(kAv1cNoConfigObusSize, bytes_read); + ASSERT_EQ(0, write_av1config(&av1_config, sizeof(av1c_buffer), &bytes_written, + &av1c_buffer[0])); + for (size_t i = 0; i < kAv1cNoConfigObusSize; ++i) { + ASSERT_EQ(kAv1cAllZero[i], av1c_buffer[i]) + << "Mismatch in output Av1Config at offset=" << i; + } +} + +} // namespace diff --git a/third_party/aom/test/av1_convolve_scale_test.cc b/third_party/aom/test/av1_convolve_scale_test.cc new file mode 100644 index 0000000000..76cf77ab07 --- /dev/null +++ b/third_party/aom/test/av1_convolve_scale_test.cc @@ -0,0 +1,561 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> +#include <vector> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +#include "av1/common/common_data.h" + +namespace { +const int kTestIters = 10; +const int kPerfIters = 1000; + +const int kVPad = 32; +const int kHPad = 32; +const int kXStepQn = 16; +const int kYStepQn = 20; + +using libaom_test::ACMRandom; +using std::make_tuple; +using std::tuple; + +enum NTaps { EIGHT_TAP, TEN_TAP, TWELVE_TAP }; +int NTapsToInt(NTaps ntaps) { return 8 + static_cast<int>(ntaps) * 2; } + +// A 16-bit filter with a configurable number of taps. +class TestFilter { + public: + void set(NTaps ntaps, bool backwards); + + InterpFilterParams params_; + + private: + std::vector<int16_t> coeffs_; +}; + +void TestFilter::set(NTaps ntaps, bool backwards) { + const int n = NTapsToInt(ntaps); + assert(n >= 8 && n <= 12); + + // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus + // elements at the end so that convolutions can read off the end safely. + coeffs_.resize(n * SUBPEL_SHIFTS + 8); + + // The coefficients are pretty much arbitrary, but convolutions shouldn't + // over or underflow. For the first filter (subpels = 0), we use an + // increasing or decreasing ramp (depending on the backwards parameter). We + // don't want any zero coefficients, so we make it have an x-intercept at -1 + // or n. To ensure absence of under/overflow, we normalise the area under the + // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function + // gives the identity). + // + // When increasing, the function has the form: + // + // f(x) = A * (x + 1) + // + // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the + // filter is reversed, we have the same A but with formula + // + // g(x) = A * (n - x) + const int I = 1 << FILTER_BITS; + const float A = 2.f * I / (n * (n + 1.f)); + for (int i = 0; i < n; ++i) { + coeffs_[i] = static_cast<int16_t>(A * (backwards ? (n - i) : (i + 1))); + } + + // For the other filters, make them slightly different by swapping two + // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped. + const size_t filter_size = sizeof(coeffs_[0] * n); + int16_t *const filter0 = &coeffs_[0]; + for (int k = 1; k < SUBPEL_SHIFTS; ++k) { + int16_t *filterk = &coeffs_[k * n]; + memcpy(filterk, filter0, filter_size); + + const int idx0 = k % n; + const int idx1 = (7 * k) % n; + + const int16_t tmp = filterk[idx0]; + filterk[idx0] = filterk[idx1]; + filterk[idx1] = tmp; + } + + // Finally, write some rubbish at the end to make sure we don't use it. + for (int i = 0; i < 8; ++i) coeffs_[n * SUBPEL_SHIFTS + i] = 123 + i; + + // Fill in params + params_.filter_ptr = &coeffs_[0]; + params_.taps = n; + // These are ignored by the functions being tested. Set them to whatever. + params_.interp_filter = EIGHTTAP_REGULAR; +} + +template <typename SrcPixel> +class TestImage { + public: + TestImage(int w, int h, int bd) : w_(w), h_(h), bd_(bd) { + assert(bd < 16); + assert(bd <= 8 * static_cast<int>(sizeof(SrcPixel))); + + // Pad width by 2*kHPad and then round up to the next multiple of 16 + // to get src_stride_. Add another 16 for dst_stride_ (to make sure + // something goes wrong if we use the wrong one) + src_stride_ = (w_ + 2 * kHPad + 15) & ~15; + dst_stride_ = src_stride_ + 16; + + // Allocate image data + src_data_.resize(2 * src_block_size()); + dst_data_.resize(2 * dst_block_size()); + dst_16_data_.resize(2 * dst_block_size()); + } + + void Initialize(ACMRandom *rnd); + void Check() const; + + int src_stride() const { return src_stride_; } + int dst_stride() const { return dst_stride_; } + + int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); } + int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); } + + const SrcPixel *GetSrcData(bool ref, bool borders) const { + const SrcPixel *block = &src_data_[ref ? 0 : src_block_size()]; + return borders ? block : block + kHPad + src_stride_ * kVPad; + } + + SrcPixel *GetDstData(bool ref, bool borders) { + SrcPixel *block = &dst_data_[ref ? 0 : dst_block_size()]; + return borders ? block : block + kHPad + dst_stride_ * kVPad; + } + + CONV_BUF_TYPE *GetDst16Data(bool ref, bool borders) { + CONV_BUF_TYPE *block = &dst_16_data_[ref ? 0 : dst_block_size()]; + return borders ? block : block + kHPad + dst_stride_ * kVPad; + } + + private: + int w_, h_, bd_; + int src_stride_, dst_stride_; + + std::vector<SrcPixel> src_data_; + std::vector<SrcPixel> dst_data_; + std::vector<CONV_BUF_TYPE> dst_16_data_; +}; + +template <typename Pixel> +void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) { + if (!trash) { + memset(data, 0, sizeof(*data) * num_pixels); + return; + } + const Pixel mask = (1 << bd) - 1; + for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask; +} + +template <typename Pixel> +void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd, + bool trash_edges, Pixel *data) { + assert(rnd); + const Pixel mask = (1 << bd) - 1; + + // Fill in the first buffer with random data + // Top border + FillEdge(rnd, stride * kVPad, bd, trash_edges, data); + for (int r = 0; r < h; ++r) { + Pixel *row_data = data + (kVPad + r) * stride; + // Left border, contents, right border + FillEdge(rnd, kHPad, bd, trash_edges, row_data); + for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask; + FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w); + } + // Bottom border + FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h)); + + const int bpp = sizeof(*data); + const int block_elts = stride * (h + 2 * kVPad); + const int block_size = bpp * block_elts; + + // Now copy that to the second buffer + memcpy(data + block_elts, data, block_size); +} + +template <typename SrcPixel> +void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) { + PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]); + PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]); + PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_16_data_[0]); +} + +template <typename SrcPixel> +void TestImage<SrcPixel>::Check() const { + // If memcmp returns 0, there's nothing to do. + const int num_pixels = dst_block_size(); + const SrcPixel *ref_dst = &dst_data_[0]; + const SrcPixel *tst_dst = &dst_data_[num_pixels]; + + const CONV_BUF_TYPE *ref_16_dst = &dst_16_data_[0]; + const CONV_BUF_TYPE *tst_16_dst = &dst_16_data_[num_pixels]; + + if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) { + if (0 == memcmp(ref_16_dst, tst_16_dst, sizeof(*ref_16_dst) * num_pixels)) + return; + } + // Otherwise, iterate through the buffer looking for differences (including + // the edges) + const int stride = dst_stride_; + for (int r = 0; r < h_ + 2 * kVPad; ++r) { + for (int c = 0; c < w_ + 2 * kHPad; ++c) { + const int32_t ref_value = ref_dst[r * stride + c]; + const int32_t tst_value = tst_dst[r * stride + c]; + + EXPECT_EQ(tst_value, ref_value) + << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad); + } + } + + for (int r = 0; r < h_ + 2 * kVPad; ++r) { + for (int c = 0; c < w_ + 2 * kHPad; ++c) { + const int32_t ref_value = ref_16_dst[r * stride + c]; + const int32_t tst_value = tst_16_dst[r * stride + c]; + + EXPECT_EQ(tst_value, ref_value) + << "Error in 16 bit buffer " + << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad); + } + } +} + +typedef tuple<int, int> BlockDimension; + +struct BaseParams { + BaseParams(BlockDimension dimensions, NTaps num_taps_x, NTaps num_taps_y, + bool average) + : dims(dimensions), ntaps_x(num_taps_x), ntaps_y(num_taps_y), + avg(average) {} + + BlockDimension dims; + NTaps ntaps_x, ntaps_y; + bool avg; +}; + +template <typename SrcPixel> +class ConvolveScaleTestBase : public ::testing::Test { + public: + ConvolveScaleTestBase() : image_(nullptr) {} + ~ConvolveScaleTestBase() override { delete image_; } + + // Implemented by subclasses (SetUp depends on the parameters passed + // in and RunOne depends on the function to be tested. These can't + // be templated for low/high bit depths because they have different + // numbers of parameters) + void SetUp() override = 0; + virtual void RunOne(bool ref) = 0; + + protected: + void SetParams(const BaseParams ¶ms, int bd) { + width_ = std::get<0>(params.dims); + height_ = std::get<1>(params.dims); + ntaps_x_ = params.ntaps_x; + ntaps_y_ = params.ntaps_y; + bd_ = bd; + avg_ = params.avg; + + filter_x_.set(ntaps_x_, false); + filter_y_.set(ntaps_y_, true); + convolve_params_ = + get_conv_params_no_round(avg_ != false, 0, nullptr, 0, 1, bd); + + delete image_; + image_ = new TestImage<SrcPixel>(width_, height_, bd_); + ASSERT_NE(image_, nullptr); + } + + void SetConvParamOffset(int i, int j, int is_compound, int do_average, + int use_dist_wtd_comp_avg) { + if (i == -1 && j == -1) { + convolve_params_.use_dist_wtd_comp_avg = use_dist_wtd_comp_avg; + convolve_params_.is_compound = is_compound; + convolve_params_.do_average = do_average; + } else { + convolve_params_.use_dist_wtd_comp_avg = use_dist_wtd_comp_avg; + convolve_params_.fwd_offset = quant_dist_lookup_table[j][i]; + convolve_params_.bck_offset = quant_dist_lookup_table[j][1 - i]; + convolve_params_.is_compound = is_compound; + convolve_params_.do_average = do_average; + } + } + + void Run() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int i = 0; i < kTestIters; ++i) { + int is_compound = 0; + SetConvParamOffset(-1, -1, is_compound, 0, 0); + Prep(&rnd); + RunOne(true); + RunOne(false); + image_->Check(); + + is_compound = 1; + for (int do_average = 0; do_average < 2; do_average++) { + for (int use_dist_wtd_comp_avg = 0; use_dist_wtd_comp_avg < 2; + use_dist_wtd_comp_avg++) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 4; ++k) { + SetConvParamOffset(j, k, is_compound, do_average, + use_dist_wtd_comp_avg); + Prep(&rnd); + RunOne(true); + RunOne(false); + image_->Check(); + } + } + } + } + } + } + + void SpeedTest() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + Prep(&rnd); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < kPerfIters; ++i) RunOne(true); + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (int i = 0; i < kPerfIters; ++i) RunOne(false); + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: CDEFSpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + } + + static int RandomSubpel(ACMRandom *rnd) { + const uint8_t subpel_mode = rnd->Rand8(); + if ((subpel_mode & 7) == 0) { + return 0; + } else if ((subpel_mode & 7) == 1) { + return SCALE_SUBPEL_SHIFTS - 1; + } else { + return 1 + rnd->PseudoUniform(SCALE_SUBPEL_SHIFTS - 2); + } + } + + void Prep(ACMRandom *rnd) { + assert(rnd); + + // Choose subpel_x_ and subpel_y_. They should be less than + // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting" + // values: 0 and SCALE_SUBPEL_SHIFTS - 1 + subpel_x_ = RandomSubpel(rnd); + subpel_y_ = RandomSubpel(rnd); + + image_->Initialize(rnd); + } + + int width_, height_, bd_; + NTaps ntaps_x_, ntaps_y_; + bool avg_; + int subpel_x_, subpel_y_; + TestFilter filter_x_, filter_y_; + TestImage<SrcPixel> *image_; + ConvolveParams convolve_params_; +}; + +typedef tuple<int, int> BlockDimension; + +typedef void (*LowbdConvolveFunc)(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int w, int h, + const InterpFilterParams *filter_params_x, + const InterpFilterParams *filter_params_y, + const int subpel_x_qn, const int x_step_qn, + const int subpel_y_qn, const int y_step_qn, + ConvolveParams *conv_params); + +// Test parameter list: +// <tst_fun, dims, ntaps_x, ntaps_y, avg> +typedef tuple<LowbdConvolveFunc, BlockDimension, NTaps, NTaps, bool> + LowBDParams; + +class LowBDConvolveScaleTest + : public ConvolveScaleTestBase<uint8_t>, + public ::testing::WithParamInterface<LowBDParams> { + public: + ~LowBDConvolveScaleTest() override = default; + + void SetUp() override { + tst_fun_ = GET_PARAM(0); + + const BlockDimension &block = GET_PARAM(1); + const NTaps ntaps_x = GET_PARAM(2); + const NTaps ntaps_y = GET_PARAM(3); + const int bd = 8; + const bool avg = GET_PARAM(4); + + SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd); + } + + void RunOne(bool ref) override { + const uint8_t *src = image_->GetSrcData(ref, false); + uint8_t *dst = image_->GetDstData(ref, false); + convolve_params_.dst = image_->GetDst16Data(ref, false); + const int src_stride = image_->src_stride(); + const int dst_stride = image_->dst_stride(); + if (ref) { + av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_, + &filter_x_.params_, &filter_y_.params_, subpel_x_, + kXStepQn, subpel_y_, kYStepQn, &convolve_params_); + } else { + tst_fun_(src, src_stride, dst, dst_stride, width_, height_, + &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn, + subpel_y_, kYStepQn, &convolve_params_); + } + } + + private: + LowbdConvolveFunc tst_fun_; +}; + +const BlockDimension kBlockDim[] = { + make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4), + make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8), + make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16), + make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32), + make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64), + make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128), +}; + +const NTaps kNTaps[] = { EIGHT_TAP }; + +TEST_P(LowBDConvolveScaleTest, Check) { Run(); } +TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P( + C, LowBDConvolveScaleTest, + ::testing::Combine(::testing::Values(av1_convolve_2d_scale_c), + ::testing::ValuesIn(kBlockDim), + ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps), + ::testing::Bool())); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, LowBDConvolveScaleTest, + ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1), + ::testing::ValuesIn(kBlockDim), + ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps), + ::testing::Bool())); +#endif // HAVE_SSE4_1 + +#if CONFIG_AV1_HIGHBITDEPTH +typedef void (*HighbdConvolveFunc)(const uint16_t *src, int src_stride, + uint16_t *dst, int dst_stride, int w, int h, + const InterpFilterParams *filter_params_x, + const InterpFilterParams *filter_params_y, + const int subpel_x_qn, const int x_step_qn, + const int subpel_y_qn, const int y_step_qn, + ConvolveParams *conv_params, int bd); + +// Test parameter list: +// <tst_fun, dims, ntaps_x, ntaps_y, avg, bd> +typedef tuple<HighbdConvolveFunc, BlockDimension, NTaps, NTaps, bool, int> + HighBDParams; + +class HighBDConvolveScaleTest + : public ConvolveScaleTestBase<uint16_t>, + public ::testing::WithParamInterface<HighBDParams> { + public: + ~HighBDConvolveScaleTest() override = default; + + void SetUp() override { + tst_fun_ = GET_PARAM(0); + + const BlockDimension &block = GET_PARAM(1); + const NTaps ntaps_x = GET_PARAM(2); + const NTaps ntaps_y = GET_PARAM(3); + const bool avg = GET_PARAM(4); + const int bd = GET_PARAM(5); + + SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd); + } + + void RunOne(bool ref) override { + const uint16_t *src = image_->GetSrcData(ref, false); + uint16_t *dst = image_->GetDstData(ref, false); + convolve_params_.dst = image_->GetDst16Data(ref, false); + const int src_stride = image_->src_stride(); + const int dst_stride = image_->dst_stride(); + + if (ref) { + av1_highbd_convolve_2d_scale_c( + src, src_stride, dst, dst_stride, width_, height_, &filter_x_.params_, + &filter_y_.params_, subpel_x_, kXStepQn, subpel_y_, kYStepQn, + &convolve_params_, bd_); + } else { + tst_fun_(src, src_stride, dst, dst_stride, width_, height_, + &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn, + subpel_y_, kYStepQn, &convolve_params_, bd_); + } + } + + private: + HighbdConvolveFunc tst_fun_; +}; + +const int kBDs[] = { 8, 10, 12 }; + +TEST_P(HighBDConvolveScaleTest, Check) { Run(); } +TEST_P(HighBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P( + C, HighBDConvolveScaleTest, + ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_c), + ::testing::ValuesIn(kBlockDim), + ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps), + ::testing::Bool(), ::testing::ValuesIn(kBDs))); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, HighBDConvolveScaleTest, + ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1), + ::testing::ValuesIn(kBlockDim), + ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps), + ::testing::Bool(), ::testing::ValuesIn(kBDs))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, HighBDConvolveScaleTest, + ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_neon), + ::testing::ValuesIn(kBlockDim), + ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps), + ::testing::Bool(), ::testing::ValuesIn(kBDs))); + +#endif // HAVE_NEON + +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/av1_convolve_test.cc b/third_party/aom/test/av1_convolve_test.cc new file mode 100644 index 0000000000..5bbac21803 --- /dev/null +++ b/third_party/aom/test/av1_convolve_test.cc @@ -0,0 +1,2447 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <ostream> +#include <set> +#include <vector> +#include "config/av1_rtcd.h" +#include "config/aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "aom_ports/aom_timer.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +// TODO(any): Remove following INTERP_FILTERS_ALL define, so that 12-tap filter +// is tested once 12-tap filter SIMD is done. +#undef INTERP_FILTERS_ALL +#define INTERP_FILTERS_ALL 4 + +// All single reference convolve tests are parameterized on block size, +// bit-depth, and function to test. +// +// Note that parameterizing on these variables (and not other parameters) is +// a conscious decision - Jenkins needs some degree of parallelization to run +// the tests within the time limit, but if the number of parameters increases +// too much, the gtest framework does not handle it well (increased overhead per +// test, huge amount of output to stdout, etc.). +// +// Also note that the test suites must be named with the architecture, e.g., +// C, C_X, AVX2_X, ... The test suite that runs on Jenkins sometimes runs tests +// that cannot deal with intrinsics (e.g., the Valgrind tests on 32-bit x86 +// binaries) and will disable tests using a filter like +// --gtest_filter=-:SSE4_1.*. If the test suites are not named this way, the +// testing infrastructure will not selectively filter them properly. +class BlockSize { + public: + BlockSize(int w, int h) : width_(w), height_(h) {} + + int Width() const { return width_; } + int Height() const { return height_; } + + bool operator<(const BlockSize &other) const { + if (Width() == other.Width()) { + return Height() < other.Height(); + } + return Width() < other.Width(); + } + + bool operator==(const BlockSize &other) const { + return Width() == other.Width() && Height() == other.Height(); + } + + private: + int width_; + int height_; +}; + +// Block size / bit depth / test function used to parameterize the tests. +template <typename T> +class TestParam { + public: + TestParam(const BlockSize &block, int bd, T test_func) + : block_(block), bd_(bd), test_func_(test_func) {} + + const BlockSize &Block() const { return block_; } + int BitDepth() const { return bd_; } + T TestFunction() const { return test_func_; } + + bool operator==(const TestParam &other) const { + return Block() == other.Block() && BitDepth() == other.BitDepth() && + TestFunction() == other.TestFunction(); + } + + private: + BlockSize block_; + int bd_; + T test_func_; +}; + +template <typename T> +std::ostream &operator<<(std::ostream &os, const TestParam<T> &test_arg) { + return os << "TestParam { width:" << test_arg.Block().Width() + << " height:" << test_arg.Block().Height() + << " bd:" << test_arg.BitDepth() << " }"; +} + +// Generate the list of all block widths / heights that need to be tested, +// includes chroma and luma sizes, for the given bit-depths. The test +// function is the same for all generated parameters. +template <typename T> +std::vector<TestParam<T>> GetTestParams(std::initializer_list<int> bit_depths, + T test_func) { + std::set<BlockSize> sizes; + for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) { + const int w = block_size_wide[b]; + const int h = block_size_high[b]; + sizes.insert(BlockSize(w, h)); + // Add in smaller chroma sizes as well. + if (w == 4 || h == 4) { + sizes.insert(BlockSize(w / 2, h / 2)); + } + } + std::vector<TestParam<T>> result; + for (const BlockSize &block : sizes) { + for (int bd : bit_depths) { + result.push_back(TestParam<T>(block, bd, test_func)); + } + } + return result; +} + +template <typename T> +std::vector<TestParam<T>> GetLowbdTestParams(T test_func) { + return GetTestParams({ 8 }, test_func); +} + +template <typename T> +::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdParams( + T test_func) { + return ::testing::ValuesIn(GetLowbdTestParams(test_func)); +} + +// Test the test-parameters generators work as expected. +class AV1ConvolveParametersTest : public ::testing::Test {}; + +TEST_F(AV1ConvolveParametersTest, GetLowbdTestParams) { + auto v = GetLowbdTestParams(av1_convolve_x_sr_c); + ASSERT_EQ(27U, v.size()); + for (const auto &p : v) { + ASSERT_EQ(8, p.BitDepth()); + // Needed (instead of ASSERT_EQ(...) since gtest does not + // have built in printing for arbitrary functions, which + // causes a compilation error. + bool same_fn = av1_convolve_x_sr_c == p.TestFunction(); + ASSERT_TRUE(same_fn); + } +} + +#if CONFIG_AV1_HIGHBITDEPTH +template <typename T> +std::vector<TestParam<T>> GetHighbdTestParams(T test_func) { + return GetTestParams({ 10, 12 }, test_func); +} + +template <typename T> +::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdParams( + T test_func) { + return ::testing::ValuesIn(GetHighbdTestParams(test_func)); +} + +TEST_F(AV1ConvolveParametersTest, GetHighbdTestParams) { + auto v = GetHighbdTestParams(av1_highbd_convolve_x_sr_c); + ASSERT_EQ(54U, v.size()); + int num_10 = 0; + int num_12 = 0; + for (const auto &p : v) { + ASSERT_TRUE(p.BitDepth() == 10 || p.BitDepth() == 12); + bool same_fn = av1_highbd_convolve_x_sr_c == p.TestFunction(); + ASSERT_TRUE(same_fn); + if (p.BitDepth() == 10) { + ++num_10; + } else { + ++num_12; + } + } + ASSERT_EQ(num_10, num_12); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +// AV1ConvolveTest is the base class that all convolve tests should derive from. +// It provides storage/methods for generating randomized buffers for both +// low bit-depth and high bit-depth, and setup/teardown methods for clearing +// system state. Implementors can get the bit-depth / block-size / +// test function by calling GetParam(). +template <typename T> +class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> { + public: + ~AV1ConvolveTest() override = default; + + void SetUp() override { + rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); + } + + // Randomizes the 8-bit input buffer and returns a pointer to it. Note that + // the pointer is safe to use with an 8-tap filter. The stride can range + // from width to (width + kPadding). Also note that the pointer is to the + // same memory location. + static constexpr int kInputPadding = 12; + + // Get a pointer to a buffer with stride == width. Note that we must have + // the test param passed in explicitly -- the gtest framework does not + // support calling GetParam() within a templatized class. + // Note that FirstRandomInput8 always returns the same pointer -- if two + // inputs are needed, also use SecondRandomInput8. + const uint8_t *FirstRandomInput8(const TestParam<T> ¶m) { + // Note we can't call GetParam() directly -- gtest does not support + // this for parameterized types. + return RandomInput8(input8_1_, param); + } + + const uint8_t *SecondRandomInput8(const TestParam<T> ¶m) { + return RandomInput8(input8_2_, param); + } + + // Some of the intrinsics perform writes in 32 byte chunks. Moreover, some + // of the instrinsics assume that the stride is also a multiple of 32. + // To satisfy these constraints and also remain simple, output buffer strides + // are assumed MAX_SB_SIZE. + static constexpr int kOutputStride = MAX_SB_SIZE; + + // Check that two 8-bit output buffers are identical. + void AssertOutputBufferEq(const uint8_t *p1, const uint8_t *p2, int width, + int height) { + ASSERT_TRUE(p1 != p2) << "Buffers must be at different memory locations"; + for (int j = 0; j < height; ++j) { + if (memcmp(p1, p2, sizeof(*p1) * width) == 0) { + p1 += kOutputStride; + p2 += kOutputStride; + continue; + } + for (int i = 0; i < width; ++i) { + ASSERT_EQ(p1[i], p2[i]) + << width << "x" << height << " Pixel mismatch at (" << i << ", " + << j << ")"; + } + } + } + + // Check that two 16-bit output buffers are identical. + void AssertOutputBufferEq(const uint16_t *p1, const uint16_t *p2, int width, + int height) { + ASSERT_TRUE(p1 != p2) << "Buffers must be in different memory locations"; + for (int j = 0; j < height; ++j) { + if (memcmp(p1, p2, sizeof(*p1) * width) == 0) { + p1 += kOutputStride; + p2 += kOutputStride; + continue; + } + for (int i = 0; i < width; ++i) { + ASSERT_EQ(p1[i], p2[i]) + << width << "x" << height << " Pixel mismatch at (" << i << ", " + << j << ")"; + } + } + } + +#if CONFIG_AV1_HIGHBITDEPTH + // Note that the randomized values are capped by bit-depth. + const uint16_t *FirstRandomInput16(const TestParam<T> ¶m) { + return RandomInput16(input16_1_, param); + } + + const uint16_t *SecondRandomInput16(const TestParam<T> ¶m) { + return RandomInput16(input16_2_, param); + } +#endif + + private: + const uint8_t *RandomInput8(uint8_t *p, const TestParam<T> ¶m) { + EXPECT_EQ(8, param.BitDepth()); + EXPECT_GE(MAX_SB_SIZE, param.Block().Width()); + EXPECT_GE(MAX_SB_SIZE, param.Block().Height()); + const int padded_width = param.Block().Width() + kInputPadding; + const int padded_height = param.Block().Height() + kInputPadding; + Randomize(p, padded_width * padded_height); + return p + (kInputPadding / 2) * padded_width + kInputPadding / 2; + } + + void Randomize(uint8_t *p, int size) { + for (int i = 0; i < size; ++i) { + p[i] = rnd_.Rand8(); + } + } + +#if CONFIG_AV1_HIGHBITDEPTH + const uint16_t *RandomInput16(uint16_t *p, const TestParam<T> ¶m) { + // Check that this is only called with high bit-depths. + EXPECT_TRUE(param.BitDepth() == 10 || param.BitDepth() == 12); + EXPECT_GE(MAX_SB_SIZE, param.Block().Width()); + EXPECT_GE(MAX_SB_SIZE, param.Block().Height()); + const int padded_width = param.Block().Width() + kInputPadding; + const int padded_height = param.Block().Height() + kInputPadding; + Randomize(p, padded_width * padded_height, param.BitDepth()); + return p + (kInputPadding / 2) * padded_width + kInputPadding / 2; + } + + void Randomize(uint16_t *p, int size, int bit_depth) { + for (int i = 0; i < size; ++i) { + p[i] = rnd_.Rand16() & ((1 << bit_depth) - 1); + } + } +#endif + + static constexpr int kInputStride = MAX_SB_SIZE + kInputPadding; + + libaom_test::ACMRandom rnd_; + // Statically allocate all the memory that is needed for the tests. Note + // that we cannot allocate output memory here. It must use DECLARE_ALIGNED, + // which is a C99 feature and interacts badly with C++ member variables. + uint8_t input8_1_[kInputStride * kInputStride]; + uint8_t input8_2_[kInputStride * kInputStride]; +#if CONFIG_AV1_HIGHBITDEPTH + uint16_t input16_1_[kInputStride * kInputStride]; + uint16_t input16_2_[kInputStride * kInputStride]; +#endif +}; + +//////////////////////////////////////////////////////// +// Single reference convolve-x functions (low bit-depth) +//////////////////////////////////////////////////////// +typedef void (*convolve_x_func)(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int w, int h, + const InterpFilterParams *filter_params_x, + const int subpel_x_qn, + ConvolveParams *conv_params); + +class AV1ConvolveXTest : public AV1ConvolveTest<convolve_x_func> { + public: + void RunTest() { + for (int sub_x = 0; sub_x < 16; ++sub_x) { + for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; + ++filter) { + InterpFilter f = static_cast<InterpFilter>(filter); + TestConvolve(sub_x, f); + } + } + } + + public: + void SpeedTest() { + for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; + ++filter) { + InterpFilter f = static_cast<InterpFilter>(filter); + TestConvolveSpeed(f, 10000); + } + } + + private: + void TestConvolve(const int sub_x, const InterpFilter filter) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(filter, width); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + const uint8_t *input = FirstRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height, + filter_params_x, sub_x, &conv_params1); + + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + convolve_x_func test_func = GetParam().TestFunction(); + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + test_func(input, width, test, kOutputStride, width, height, filter_params_x, + sub_x, &conv_params2); + AssertOutputBufferEq(reference, test, width, height); + } + + private: + void TestConvolveSpeed(const InterpFilter filter, const int num_iters) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(filter, width); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + const uint8_t *input = FirstRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height, + filter_params_x, 0, &conv_params1); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + convolve_x_func test_func = GetParam().TestFunction(); + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + test_func(input, width, test, kOutputStride, width, height, + filter_params_x, 0, &conv_params2); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, + time2, time1 / time2); + } +}; + +TEST_P(AV1ConvolveXTest, RunTest) { RunTest(); } + +TEST_P(AV1ConvolveXTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXTest, + BuildLowbdParams(av1_convolve_x_sr_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXTest, + BuildLowbdParams(av1_convolve_x_sr_sse2)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXTest, + BuildLowbdParams(av1_convolve_x_sr_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXTest, + BuildLowbdParams(av1_convolve_x_sr_neon)); +#endif + +#if HAVE_NEON_DOTPROD +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveXTest, + BuildLowbdParams(av1_convolve_x_sr_neon_dotprod)); +#endif + +#if HAVE_NEON_I8MM +INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveXTest, + BuildLowbdParams(av1_convolve_x_sr_neon_i8mm)); +#endif + +//////////////////////////////////////////////////////////////// +// Single reference convolve-x IntraBC functions (low bit-depth) +//////////////////////////////////////////////////////////////// + +class AV1ConvolveXIntraBCTest : public AV1ConvolveTest<convolve_x_func> { + public: + void RunTest() { + // IntraBC functions only operate for subpel_x_qn = 8. + constexpr int kSubX = 8; + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; + const uint8_t *input = FirstRandomInput8(GetParam()); + + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + // Use a stride different from width to avoid potential storing errors that + // would go undetected. The input buffer is filled using a padding of 12, so + // the stride can be anywhere between width and width + 12. + av1_convolve_x_sr_intrabc_c(input, width + 2, reference, kOutputStride, + width, height, filter_params_x, kSubX, + &conv_params1); + + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + convolve_x_func test_func = GetParam().TestFunction(); + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + test_func(input, width + 2, test, kOutputStride, width, height, + filter_params_x, kSubX, &conv_params2); + + AssertOutputBufferEq(reference, test, width, height); + } + + void SpeedTest() { + constexpr int kNumIters = 10000; + const InterpFilter filter = static_cast<InterpFilter>(BILINEAR); + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; + const uint8_t *input = FirstRandomInput8(GetParam()); + + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + av1_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride, width, + height, filter_params_x, 0, &conv_params1); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + convolve_x_func test_func = GetParam().TestFunction(); + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + test_func(input, width, test, kOutputStride, width, height, + filter_params_x, 0, &conv_params2); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, + time2, time1 / time2); + } +}; + +TEST_P(AV1ConvolveXIntraBCTest, RunTest) { RunTest(); } + +TEST_P(AV1ConvolveXIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXIntraBCTest, + BuildLowbdParams(av1_convolve_x_sr_intrabc_c)); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXIntraBCTest, + BuildLowbdParams(av1_convolve_x_sr_intrabc_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +///////////////////////////////////////////////////////// +// Single reference convolve-x functions (high bit-depth) +///////////////////////////////////////////////////////// +typedef void (*highbd_convolve_x_func)( + const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, + int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn, + ConvolveParams *conv_params, int bd); + +class AV1ConvolveXHighbdTest : public AV1ConvolveTest<highbd_convolve_x_func> { + public: + void RunTest() { + for (int sub_x = 0; sub_x < 16; ++sub_x) { + for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; + ++filter) { + InterpFilter f = static_cast<InterpFilter>(filter); + TestConvolve(sub_x, f); + } + } + } + + public: + void SpeedTest() { + for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; + ++filter) { + InterpFilter f = static_cast<InterpFilter>(filter); + TestConvolveSpeed(f, 10000); + } + } + + private: + void TestConvolve(const int sub_x, const InterpFilter filter) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(filter, width); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); + const uint16_t *input = FirstRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width, + height, filter_params_x, sub_x, &conv_params1, + bit_depth); + + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + GetParam().TestFunction()(input, width, test, kOutputStride, width, height, + filter_params_x, sub_x, &conv_params2, bit_depth); + AssertOutputBufferEq(reference, test, width, height); + } + + private: + void TestConvolveSpeed(const InterpFilter filter, const int num_iters) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(filter, width); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + const uint16_t *input = FirstRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width, + height, filter_params_x, 0, &conv_params1, + bit_depth); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + highbd_convolve_x_func test_func = GetParam().TestFunction(); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + test_func(input, width, test, kOutputStride, width, height, + filter_params_x, 0, &conv_params2, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, + time2, time1 / time2); + } +}; + +TEST_P(AV1ConvolveXHighbdTest, RunTest) { RunTest(); } + +TEST_P(AV1ConvolveXHighbdTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdTest, + BuildHighbdParams(av1_highbd_convolve_x_sr_c)); + +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveXHighbdTest, + BuildHighbdParams(av1_highbd_convolve_x_sr_ssse3)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXHighbdTest, + BuildHighbdParams(av1_highbd_convolve_x_sr_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXHighbdTest, + BuildHighbdParams(av1_highbd_convolve_x_sr_neon)); +#endif + +///////////////////////////////////////////////////////////////// +// Single reference convolve-x IntraBC functions (high bit-depth) +///////////////////////////////////////////////////////////////// + +class AV1ConvolveXHighbdIntraBCTest + : public AV1ConvolveTest<highbd_convolve_x_func> { + public: + void RunTest() { + // IntraBC functions only operate for subpel_x_qn = 8. + constexpr int kSubX = 8; + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; + const uint16_t *input = FirstRandomInput16(GetParam()); + + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + // Use a stride different from width to avoid potential storing errors that + // would go undetected. The input buffer is filled using a padding of 12, so + // the stride can be anywhere between width and width + 12. + av1_highbd_convolve_x_sr_intrabc_c( + input, width + 2, reference, kOutputStride, width, height, + filter_params_x, kSubX, &conv_params1, bit_depth); + + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, + height, filter_params_x, kSubX, &conv_params2, + bit_depth); + + AssertOutputBufferEq(reference, test, width, height); + } + + void SpeedTest() { + constexpr int kNumIters = 10000; + const InterpFilter filter = static_cast<InterpFilter>(BILINEAR); + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; + const uint16_t *input = FirstRandomInput16(GetParam()); + + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + av1_highbd_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride, + width, height, filter_params_x, 0, + &conv_params1, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + highbd_convolve_x_func test_func = GetParam().TestFunction(); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + test_func(input, width, test, kOutputStride, width, height, + filter_params_x, 0, &conv_params2, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, + time2, time1 / time2); + } +}; + +TEST_P(AV1ConvolveXHighbdIntraBCTest, RunTest) { RunTest(); } + +TEST_P(AV1ConvolveXHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdIntraBCTest, + BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_c)); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1ConvolveXHighbdIntraBCTest, + BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_neon)); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH + +//////////////////////////////////////////////////////// +// Single reference convolve-y functions (low bit-depth) +//////////////////////////////////////////////////////// +typedef void (*convolve_y_func)(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int w, int h, + const InterpFilterParams *filter_params_y, + const int subpel_y_qn); + +class AV1ConvolveYTest : public AV1ConvolveTest<convolve_y_func> { + public: + void RunTest() { + for (int sub_y = 0; sub_y < 16; ++sub_y) { + for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; + ++filter) { + InterpFilter f = static_cast<InterpFilter>(filter); + TestConvolve(sub_y, f); + } + } + } + + public: + void SpeedTest() { + for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; + ++filter) { + InterpFilter f = static_cast<InterpFilter>(filter); + TestConvolveSpeed(f, 10000); + } + } + + private: + void TestConvolve(const int sub_y, const InterpFilter filter) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(filter, height); + const uint8_t *input = FirstRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height, + filter_params_y, sub_y); + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + GetParam().TestFunction()(input, width, test, kOutputStride, width, height, + filter_params_y, sub_y); + AssertOutputBufferEq(reference, test, width, height); + } + + private: + void TestConvolveSpeed(const InterpFilter filter, const int num_iters) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(filter, height); + const uint8_t *input = FirstRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height, + filter_params_y, 0); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + GetParam().TestFunction()(input, width, test, kOutputStride, width, + height, filter_params_y, 0); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, + time2, time1 / time2); + } +}; + +TEST_P(AV1ConvolveYTest, RunTest) { RunTest(); } + +TEST_P(AV1ConvolveYTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYTest, + BuildLowbdParams(av1_convolve_y_sr_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYTest, + BuildLowbdParams(av1_convolve_y_sr_sse2)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYTest, + BuildLowbdParams(av1_convolve_y_sr_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYTest, + BuildLowbdParams(av1_convolve_y_sr_neon)); +#endif + +//////////////////////////////////////////////////////////////// +// Single reference convolve-y IntraBC functions (low bit-depth) +//////////////////////////////////////////////////////////////// + +class AV1ConvolveYIntraBCTest : public AV1ConvolveTest<convolve_y_func> { + public: + void RunTest() { + // IntraBC functions only operate for subpel_y_qn = 8. + constexpr int kSubY = 8; + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; + const uint8_t *input = FirstRandomInput8(GetParam()); + + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + // Use a stride different from width to avoid potential storing errors that + // would go undetected. The input buffer is filled using a padding of 12, so + // the stride can be anywhere between width and width + 12. + av1_convolve_y_sr_intrabc_c(input, width + 2, reference, kOutputStride, + width, height, filter_params_y, kSubY); + + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, + height, filter_params_y, kSubY); + + AssertOutputBufferEq(reference, test, width, height); + } + + void SpeedTest() { + constexpr int kNumIters = 10000; + const InterpFilter filter = static_cast<InterpFilter>(BILINEAR); + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + + const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; + const uint8_t *input = FirstRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + av1_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride, width, + height, filter_params_y, 0); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + convolve_y_func test_func = GetParam().TestFunction(); + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + test_func(input, width, test, kOutputStride, width, height, + filter_params_y, 0); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, + time2, time1 / time2); + } +}; + +TEST_P(AV1ConvolveYIntraBCTest, RunTest) { RunTest(); } + +TEST_P(AV1ConvolveYIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYIntraBCTest, + BuildLowbdParams(av1_convolve_y_sr_intrabc_c)); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYIntraBCTest, + BuildLowbdParams(av1_convolve_y_sr_intrabc_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +///////////////////////////////////////////////////////// +// Single reference convolve-y functions (high bit-depth) +///////////////////////////////////////////////////////// +typedef void (*highbd_convolve_y_func)( + const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, + int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn, + int bd); + +class AV1ConvolveYHighbdTest : public AV1ConvolveTest<highbd_convolve_y_func> { + public: + void RunTest() { + for (int sub_y = 0; sub_y < 16; ++sub_y) { + for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; + ++filter) { + InterpFilter f = static_cast<InterpFilter>(filter); + TestConvolve(sub_y, f); + } + } + } + + public: + void SpeedTest() { + for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; + ++filter) { + InterpFilter f = static_cast<InterpFilter>(filter); + TestConvolveSpeed(f, 10000); + } + } + + private: + void TestConvolve(const int sub_y, const InterpFilter filter) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(filter, height); + const uint16_t *input = FirstRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width, + height, filter_params_y, sub_y, bit_depth); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + GetParam().TestFunction()(input, width, test, kOutputStride, width, height, + filter_params_y, sub_y, bit_depth); + AssertOutputBufferEq(reference, test, width, height); + } + + private: + void TestConvolveSpeed(const InterpFilter filter, const int num_iters) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(filter, width); + const uint16_t *input = FirstRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width, + height, filter_params_y, 0, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + highbd_convolve_y_func test_func = GetParam().TestFunction(); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + test_func(input, width, test, kOutputStride, width, height, + filter_params_y, 0, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, + time2, time1 / time2); + } +}; + +TEST_P(AV1ConvolveYHighbdTest, RunTest) { RunTest(); } + +TEST_P(AV1ConvolveYHighbdTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdTest, + BuildHighbdParams(av1_highbd_convolve_y_sr_c)); + +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveYHighbdTest, + BuildHighbdParams(av1_highbd_convolve_y_sr_ssse3)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYHighbdTest, + BuildHighbdParams(av1_highbd_convolve_y_sr_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYHighbdTest, + BuildHighbdParams(av1_highbd_convolve_y_sr_neon)); +#endif + +///////////////////////////////////////////////////////////////// +// Single reference convolve-y IntraBC functions (high bit-depth) +///////////////////////////////////////////////////////////////// + +class AV1ConvolveYHighbdIntraBCTest + : public AV1ConvolveTest<highbd_convolve_y_func> { + public: + void RunTest() { + // IntraBC functions only operate for subpel_y_qn = 8. + constexpr int kSubY = 8; + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; + const uint16_t *input = FirstRandomInput16(GetParam()); + + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + // Use a stride different from width to avoid potential storing errors that + // would go undetected. The input buffer is filled using a padding of 12, so + // the stride can be anywhere between width and width + 12. + av1_highbd_convolve_y_sr_intrabc_c(input, width + 2, reference, + kOutputStride, width, height, + filter_params_y, kSubY, bit_depth); + + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, + height, filter_params_y, kSubY, bit_depth); + + AssertOutputBufferEq(reference, test, width, height); + } + + void SpeedTest() { + constexpr int kNumIters = 10000; + const InterpFilter filter = static_cast<InterpFilter>(BILINEAR); + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(filter, width); + const uint16_t *input = FirstRandomInput16(GetParam()); + + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + av1_highbd_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride, + width, height, filter_params_y, 0, + bit_depth); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + highbd_convolve_y_func test_func = GetParam().TestFunction(); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + test_func(input, width, test, kOutputStride, width, height, + filter_params_y, 0, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, + time2, time1 / time2); + } +}; + +TEST_P(AV1ConvolveYHighbdIntraBCTest, RunTest) { RunTest(); } + +TEST_P(AV1ConvolveYHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdIntraBCTest, + BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_c)); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1ConvolveYHighbdIntraBCTest, + BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_neon)); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH + +////////////////////////////////////////////////////////////// +// Single reference convolve-copy functions (low bit-depth) +////////////////////////////////////////////////////////////// +typedef void (*convolve_copy_func)(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, int w, + int h); + +class AV1ConvolveCopyTest : public AV1ConvolveTest<convolve_copy_func> { + public: + void RunTest() { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const uint8_t *input = FirstRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + aom_convolve_copy_c(input, width, reference, kOutputStride, width, height); + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + GetParam().TestFunction()(input, width, test, kOutputStride, width, height); + AssertOutputBufferEq(reference, test, width, height); + } +}; + +// Note that even though these are AOM convolve functions, we are using the +// newer AV1 test framework. +TEST_P(AV1ConvolveCopyTest, RunTest) { RunTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyTest, + BuildLowbdParams(aom_convolve_copy_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyTest, + BuildLowbdParams(aom_convolve_copy_sse2)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyTest, + BuildLowbdParams(aom_convolve_copy_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyTest, + BuildLowbdParams(aom_convolve_copy_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +/////////////////////////////////////////////////////////////// +// Single reference convolve-copy functions (high bit-depth) +/////////////////////////////////////////////////////////////// +typedef void (*highbd_convolve_copy_func)(const uint16_t *src, + ptrdiff_t src_stride, uint16_t *dst, + ptrdiff_t dst_stride, int w, int h); + +class AV1ConvolveCopyHighbdTest + : public AV1ConvolveTest<highbd_convolve_copy_func> { + public: + void RunTest() { + const BlockSize &block = GetParam().Block(); + const int width = block.Width(); + const int height = block.Height(); + const uint16_t *input = FirstRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + aom_highbd_convolve_copy_c(input, width, reference, kOutputStride, width, + height); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + GetParam().TestFunction()(input, width, test, kOutputStride, width, height); + AssertOutputBufferEq(reference, test, width, height); + } +}; + +TEST_P(AV1ConvolveCopyHighbdTest, RunTest) { RunTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyHighbdTest, + BuildHighbdParams(aom_highbd_convolve_copy_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyHighbdTest, + BuildHighbdParams(aom_highbd_convolve_copy_sse2)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyHighbdTest, + BuildHighbdParams(aom_highbd_convolve_copy_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyHighbdTest, + BuildHighbdParams(aom_highbd_convolve_copy_neon)); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH + +///////////////////////////////////////////////////////// +// Single reference convolve-2D functions (low bit-depth) +///////////////////////////////////////////////////////// +typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int w, int h, + const InterpFilterParams *filter_params_x, + const InterpFilterParams *filter_params_y, + const int subpel_x_qn, const int subpel_y_qn, + ConvolveParams *conv_params); + +class AV1Convolve2DTest : public AV1ConvolveTest<convolve_2d_func> { + public: + void RunTest() { + for (int sub_x = 0; sub_x < 16; ++sub_x) { + for (int sub_y = 0; sub_y < 16; ++sub_y) { + for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) { + for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) { + if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) || + ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2))) + continue; + TestConvolve(static_cast<InterpFilter>(h_f), + static_cast<InterpFilter>(v_f), sub_x, sub_y); + } + } + } + } + } + + public: + void SpeedTest() { + for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) { + for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) { + if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) || + ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2))) + continue; + TestConvolveSpeed(static_cast<InterpFilter>(h_f), + static_cast<InterpFilter>(v_f), 10000); + } + } + } + + private: + void TestConvolve(const InterpFilter h_f, const InterpFilter v_f, + const int sub_x, const int sub_y) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(h_f, width); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(v_f, height); + const uint8_t *input = FirstRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width, height, + filter_params_x, filter_params_y, sub_x, sub_y, + &conv_params1); + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + GetParam().TestFunction()(input, width, test, kOutputStride, width, height, + filter_params_x, filter_params_y, sub_x, sub_y, + &conv_params2); + AssertOutputBufferEq(reference, test, width, height); + } + + private: + void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f, + int num_iters) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(h_f, width); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(v_f, height); + const uint8_t *input = FirstRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width, + height, filter_params_x, filter_params_y, 0, 0, + &conv_params1); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + GetParam().TestFunction()(input, width, test, kOutputStride, width, + height, filter_params_x, filter_params_y, 0, 0, + &conv_params2); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height, + time1, time2, time1 / time2); + } +}; + +TEST_P(AV1Convolve2DTest, RunTest) { RunTest(); } + +TEST_P(AV1Convolve2DTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DTest, + BuildLowbdParams(av1_convolve_2d_sr_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1Convolve2DTest, + BuildLowbdParams(av1_convolve_2d_sr_sse2)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DTest, + BuildLowbdParams(av1_convolve_2d_sr_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DTest, + BuildLowbdParams(av1_convolve_2d_sr_neon)); +#endif + +#if HAVE_NEON_DOTPROD +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1Convolve2DTest, + BuildLowbdParams(av1_convolve_2d_sr_neon_dotprod)); +#endif + +#if HAVE_NEON_I8MM +INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1Convolve2DTest, + BuildLowbdParams(av1_convolve_2d_sr_neon_i8mm)); +#endif + +///////////////////////////////////////////////////////////////// +// Single reference convolve-2D IntraBC functions (low bit-depth) +///////////////////////////////////////////////////////////////// + +class AV1Convolve2DIntraBCTest : public AV1ConvolveTest<convolve_2d_func> { + public: + void RunTest() { + // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8. + constexpr int kSubX = 8; + constexpr int kSubY = 8; + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; + const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; + const uint8_t *input = FirstRandomInput8(GetParam()); + + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + // Use a stride different from width to avoid potential storing errors that + // would go undetected. The input buffer is filled using a padding of 12, so + // the stride can be anywhere between width and width + 12. + av1_convolve_2d_sr_intrabc_c(input, width + 2, reference, kOutputStride, + width, height, filter_params_x, + filter_params_y, kSubX, kSubY, &conv_params1); + + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, + height, filter_params_x, filter_params_y, kSubX, + kSubY, &conv_params2); + + AssertOutputBufferEq(reference, test, width, height); + } + + void SpeedTest() { + constexpr int kNumIters = 10000; + const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR); + const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR); + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; + const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; + const uint8_t *input = FirstRandomInput8(GetParam()); + + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + av1_convolve_2d_sr_intrabc_c(input, width, reference, kOutputStride, + width, height, filter_params_x, + filter_params_y, 8, 8, &conv_params1); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + convolve_2d_func test_func = GetParam().TestFunction(); + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + test_func(input, width, test, kOutputStride, width, height, + filter_params_x, filter_params_y, 8, 8, &conv_params2); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height, + time1, time2, time1 / time2); + } +}; + +TEST_P(AV1Convolve2DIntraBCTest, RunTest) { RunTest(); } + +TEST_P(AV1Convolve2DIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DIntraBCTest, + BuildLowbdParams(av1_convolve_2d_sr_intrabc_c)); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DIntraBCTest, + BuildLowbdParams(av1_convolve_2d_sr_intrabc_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +////////////////////////////////////////////////////////// +// Single reference convolve-2d functions (high bit-depth) +////////////////////////////////////////////////////////// + +typedef void (*highbd_convolve_2d_func)( + const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, + int h, const InterpFilterParams *filter_params_x, + const InterpFilterParams *filter_params_y, const int subpel_x_qn, + const int subpel_y_qn, ConvolveParams *conv_params, int bd); + +class AV1Convolve2DHighbdTest + : public AV1ConvolveTest<highbd_convolve_2d_func> { + public: + void RunTest() { + for (int sub_x = 0; sub_x < 16; ++sub_x) { + for (int sub_y = 0; sub_y < 16; ++sub_y) { + for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) { + for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) { + if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) || + ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2))) + continue; + TestConvolve(static_cast<InterpFilter>(h_f), + static_cast<InterpFilter>(v_f), sub_x, sub_y); + } + } + } + } + } + + public: + void SpeedTest() { + for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) { + for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) { + if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) || + ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2))) + continue; + TestConvolveSpeed(static_cast<InterpFilter>(h_f), + static_cast<InterpFilter>(v_f), 10000); + } + } + } + + private: + void TestConvolve(const InterpFilter h_f, const InterpFilter v_f, + const int sub_x, const int sub_y) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(h_f, width); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(v_f, height); + const uint16_t *input = FirstRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); + av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width, + height, filter_params_x, filter_params_y, sub_x, + sub_y, &conv_params1, bit_depth); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); + GetParam().TestFunction()(input, width, test, kOutputStride, width, height, + filter_params_x, filter_params_y, sub_x, sub_y, + &conv_params2, bit_depth); + AssertOutputBufferEq(reference, test, width, height); + } + + void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f, + int num_iters) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(h_f, width); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(v_f, height); + const uint16_t *input = FirstRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width, + height, filter_params_x, filter_params_y, 0, + 0, &conv_params1, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + GetParam().TestFunction()(input, width, test, kOutputStride, width, + height, filter_params_x, filter_params_y, 0, 0, + &conv_params2, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height, + time1, time2, time1 / time2); + } +}; + +TEST_P(AV1Convolve2DHighbdTest, RunTest) { RunTest(); } + +TEST_P(AV1Convolve2DHighbdTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DHighbdTest, + BuildHighbdParams(av1_highbd_convolve_2d_sr_c)); + +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DHighbdTest, + BuildHighbdParams(av1_highbd_convolve_2d_sr_ssse3)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DHighbdTest, + BuildHighbdParams(av1_highbd_convolve_2d_sr_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DHighbdTest, + BuildHighbdParams(av1_highbd_convolve_2d_sr_neon)); +#endif + +////////////////////////////////////////////////////////////////// +// Single reference convolve-2d IntraBC functions (high bit-depth) +////////////////////////////////////////////////////////////////// + +class AV1Convolve2DHighbdIntraBCTest + : public AV1ConvolveTest<highbd_convolve_2d_func> { + public: + void RunTest() { + // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8. + constexpr int kSubX = 8; + constexpr int kSubY = 8; + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; + const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; + const uint16_t *input = FirstRandomInput16(GetParam()); + + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); + // Use a stride different from width to avoid potential storing errors that + // would go undetected. The input buffer is filled using a padding of 12, so + // the stride can be anywhere between width and width + 12. + av1_highbd_convolve_2d_sr_intrabc_c(input, width + 2, reference, + kOutputStride, width, height, + filter_params_x, filter_params_y, kSubX, + kSubY, &conv_params1, bit_depth); + + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); + GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, + height, filter_params_x, filter_params_y, kSubX, + kSubY, &conv_params2, bit_depth); + + AssertOutputBufferEq(reference, test, width, height); + } + + void SpeedTest() { + constexpr int kNumIters = 10000; + const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR); + const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR); + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(h_f, width); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(v_f, height); + const uint16_t *input = FirstRandomInput16(GetParam()); + + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + ConvolveParams conv_params1 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + av1_highbd_convolve_2d_sr_intrabc_c( + input, width, reference, kOutputStride, width, height, + filter_params_x, filter_params_y, 0, 0, &conv_params1, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + highbd_convolve_2d_func test_func = GetParam().TestFunction(); + ConvolveParams conv_params2 = + get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); + aom_usec_timer_start(&timer); + for (int i = 0; i < kNumIters; ++i) { + test_func(input, width, test, kOutputStride, width, height, + filter_params_x, filter_params_y, 0, 0, &conv_params2, + bit_depth); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height, + time1, time2, time1 / time2); + } +}; + +TEST_P(AV1Convolve2DHighbdIntraBCTest, RunTest) { RunTest(); } + +TEST_P(AV1Convolve2DHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P( + C, AV1Convolve2DHighbdIntraBCTest, + BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_c)); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1Convolve2DHighbdIntraBCTest, + BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_neon)); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH + +////////////////////////// +// Compound Convolve Tests +////////////////////////// + +// The compound functions do not work for chroma block sizes. Provide +// a function to generate test parameters for just luma block sizes. +template <typename T> +std::vector<TestParam<T>> GetLumaTestParams( + std::initializer_list<int> bit_depths, T test_func) { + std::set<BlockSize> sizes; + for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) { + const int w = block_size_wide[b]; + const int h = block_size_high[b]; + sizes.insert(BlockSize(w, h)); + } + std::vector<TestParam<T>> result; + for (int bit_depth : bit_depths) { + for (const auto &block : sizes) { + result.push_back(TestParam<T>(block, bit_depth, test_func)); + } + } + return result; +} + +template <typename T> +std::vector<TestParam<T>> GetLowbdLumaTestParams(T test_func) { + return GetLumaTestParams({ 8 }, test_func); +} + +template <typename T> +::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdLumaParams( + T test_func) { + return ::testing::ValuesIn(GetLowbdLumaTestParams(test_func)); +} + +TEST_F(AV1ConvolveParametersTest, GetLowbdLumaTestParams) { + auto v = GetLowbdLumaTestParams(av1_dist_wtd_convolve_x_c); + ASSERT_EQ(22U, v.size()); + for (const auto &e : v) { + ASSERT_EQ(8, e.BitDepth()); + bool same_fn = av1_dist_wtd_convolve_x_c == e.TestFunction(); + ASSERT_TRUE(same_fn); + } +} + +#if CONFIG_AV1_HIGHBITDEPTH +template <typename T> +std::vector<TestParam<T>> GetHighbdLumaTestParams(T test_func) { + return GetLumaTestParams({ 10, 12 }, test_func); +} + +TEST_F(AV1ConvolveParametersTest, GetHighbdLumaTestParams) { + auto v = GetHighbdLumaTestParams(av1_highbd_dist_wtd_convolve_x_c); + ASSERT_EQ(44U, v.size()); + int num_10 = 0; + int num_12 = 0; + for (const auto &e : v) { + ASSERT_TRUE(10 == e.BitDepth() || 12 == e.BitDepth()); + bool same_fn = av1_highbd_dist_wtd_convolve_x_c == e.TestFunction(); + ASSERT_TRUE(same_fn); + if (e.BitDepth() == 10) { + ++num_10; + } else { + ++num_12; + } + } + ASSERT_EQ(num_10, num_12); +} + +template <typename T> +::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdLumaParams( + T test_func) { + return ::testing::ValuesIn(GetHighbdLumaTestParams(test_func)); +} + +#endif // CONFIG_AV1_HIGHBITDEPTH + +// Compound cases also need to test different frame offsets and weightings. +class CompoundParam { + public: + CompoundParam(bool use_dist_wtd_comp_avg, int fwd_offset, int bck_offset) + : use_dist_wtd_comp_avg_(use_dist_wtd_comp_avg), fwd_offset_(fwd_offset), + bck_offset_(bck_offset) {} + + bool UseDistWtdCompAvg() const { return use_dist_wtd_comp_avg_; } + int FwdOffset() const { return fwd_offset_; } + int BckOffset() const { return bck_offset_; } + + private: + bool use_dist_wtd_comp_avg_; + int fwd_offset_; + int bck_offset_; +}; + +std::vector<CompoundParam> GetCompoundParams() { + std::vector<CompoundParam> result; + result.push_back(CompoundParam(false, 0, 0)); + for (int k = 0; k < 2; ++k) { + for (int l = 0; l < 4; ++l) { + result.push_back(CompoundParam(true, quant_dist_lookup_table[l][k], + quant_dist_lookup_table[l][1 - k])); + } + } + return result; +} + +TEST_F(AV1ConvolveParametersTest, GetCompoundParams) { + auto v = GetCompoundParams(); + ASSERT_EQ(9U, v.size()); + ASSERT_FALSE(v[0].UseDistWtdCompAvg()); + for (size_t i = 1; i < v.size(); ++i) { + ASSERT_TRUE(v[i].UseDistWtdCompAvg()); + } +} + +//////////////////////////////////////////////// +// Compound convolve-x functions (low bit-depth) +//////////////////////////////////////////////// + +ConvolveParams GetConvolveParams(int do_average, CONV_BUF_TYPE *conv_buf, + int width, int bit_depth, + const CompoundParam &compound) { + ConvolveParams conv_params = + get_conv_params_no_round(do_average, 0, conv_buf, width, 1, bit_depth); + conv_params.use_dist_wtd_comp_avg = compound.UseDistWtdCompAvg(); + conv_params.fwd_offset = compound.FwdOffset(); + conv_params.bck_offset = compound.BckOffset(); + return conv_params; +} + +class AV1ConvolveXCompoundTest : public AV1ConvolveTest<convolve_x_func> { + public: + void RunTest() { + auto compound_params = GetCompoundParams(); + for (int sub_pix = 0; sub_pix < 16; ++sub_pix) { + for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) { + for (const auto &c : compound_params) { + TestConvolve(sub_pix, static_cast<InterpFilter>(f), c); + } + } + } + } + + protected: + virtual const InterpFilterParams *FilterParams(InterpFilter f, + const BlockSize &block) const { + return av1_get_interp_filter_params_with_block_size(f, block.Width()); + } + + virtual convolve_x_func ReferenceFunc() const { + return av1_dist_wtd_convolve_x_c; + } + + private: + void TestConvolve(const int sub_pix, const InterpFilter filter, + const CompoundParam &compound) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const uint8_t *input1 = FirstRandomInput8(GetParam()); + const uint8_t *input2 = SecondRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); + Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf, + compound, sub_pix, filter); + + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); + Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, + compound, sub_pix, filter); + + AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); + AssertOutputBufferEq(reference, test, width, height); + } + + private: + void Convolve(convolve_x_func test_func, const uint8_t *src1, + const uint8_t *src2, uint8_t *dst, CONV_BUF_TYPE *conv_buf, + const CompoundParam &compound, const int sub_pix, + const InterpFilter filter) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const InterpFilterParams *filter_params = + FilterParams(filter, GetParam().Block()); + + ConvolveParams conv_params = + GetConvolveParams(0, conv_buf, kOutputStride, 8, compound); + test_func(src1, width, dst, kOutputStride, width, height, filter_params, + sub_pix, &conv_params); + + conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound); + test_func(src2, width, dst, kOutputStride, width, height, filter_params, + sub_pix, &conv_params); + } +}; + +TEST_P(AV1ConvolveXCompoundTest, RunTest) { RunTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_x_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_x_sse2)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_x_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon)); +#endif + +#if HAVE_NEON_DOTPROD +INSTANTIATE_TEST_SUITE_P( + NEON_DOTPROD, AV1ConvolveXCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_dotprod)); +#endif + +#if HAVE_NEON_I8MM +INSTANTIATE_TEST_SUITE_P( + NEON_I8MM, AV1ConvolveXCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_i8mm)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +///////////////////////////////////////////////// +// Compound convolve-x functions (high bit-depth) +///////////////////////////////////////////////// +class AV1ConvolveXHighbdCompoundTest + : public AV1ConvolveTest<highbd_convolve_x_func> { + public: + void RunTest() { + auto compound_params = GetCompoundParams(); + for (int sub_pix = 0; sub_pix < 16; ++sub_pix) { + for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) { + for (const auto &c : compound_params) { + TestConvolve(sub_pix, static_cast<InterpFilter>(f), c); + } + } + } + } + + protected: + virtual const InterpFilterParams *FilterParams(InterpFilter f, + const BlockSize &block) const { + return av1_get_interp_filter_params_with_block_size(f, block.Width()); + } + + virtual highbd_convolve_x_func ReferenceFunc() const { + return av1_highbd_dist_wtd_convolve_x_c; + } + + private: + void TestConvolve(const int sub_pix, const InterpFilter filter, + const CompoundParam &compound) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + + const uint16_t *input1 = FirstRandomInput16(GetParam()); + const uint16_t *input2 = SecondRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); + Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf, + compound, sub_pix, filter); + + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); + Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, + compound, sub_pix, filter); + + AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); + AssertOutputBufferEq(reference, test, width, height); + } + + void Convolve(highbd_convolve_x_func test_func, const uint16_t *src1, + const uint16_t *src2, uint16_t *dst, CONV_BUF_TYPE *conv_buf, + const CompoundParam &compound, const int sub_pix, + const InterpFilter filter) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + const int bit_depth = GetParam().BitDepth(); + const InterpFilterParams *filter_params = + FilterParams(filter, GetParam().Block()); + ConvolveParams conv_params = + GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound); + test_func(src1, width, dst, kOutputStride, width, height, filter_params, + sub_pix, &conv_params, bit_depth); + conv_params = + GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound); + test_func(src2, width, dst, kOutputStride, width, height, filter_params, + sub_pix, &conv_params, bit_depth); + } +}; + +TEST_P(AV1ConvolveXHighbdCompoundTest, RunTest) { RunTest(); } + +INSTANTIATE_TEST_SUITE_P( + C, AV1ConvolveXHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_c)); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1ConvolveXHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sse4_1)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1ConvolveXHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1ConvolveXHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_neon)); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH + +//////////////////////////////////////////////// +// Compound convolve-y functions (low bit-depth) +//////////////////////////////////////////////// + +// Note that the X and Y convolve functions have the same type signature and +// logic; they only differentiate the filter parameters and reference function. +class AV1ConvolveYCompoundTest : public AV1ConvolveXCompoundTest { + protected: + const InterpFilterParams *FilterParams( + InterpFilter f, const BlockSize &block) const override { + return av1_get_interp_filter_params_with_block_size(f, block.Height()); + } + + convolve_x_func ReferenceFunc() const override { + return av1_dist_wtd_convolve_y_c; + } +}; + +TEST_P(AV1ConvolveYCompoundTest, RunTest) { RunTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_y_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_y_sse2)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_y_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_y_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +///////////////////////////////////////////////// +// Compound convolve-y functions (high bit-depth) +///////////////////////////////////////////////// + +// Again, the X and Y convolve functions have the same type signature and logic. +class AV1ConvolveYHighbdCompoundTest : public AV1ConvolveXHighbdCompoundTest { + highbd_convolve_x_func ReferenceFunc() const override { + return av1_highbd_dist_wtd_convolve_y_c; + } + const InterpFilterParams *FilterParams( + InterpFilter f, const BlockSize &block) const override { + return av1_get_interp_filter_params_with_block_size(f, block.Height()); + } +}; + +TEST_P(AV1ConvolveYHighbdCompoundTest, RunTest) { RunTest(); } + +INSTANTIATE_TEST_SUITE_P( + C, AV1ConvolveYHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_c)); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1ConvolveYHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sse4_1)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1ConvolveYHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1ConvolveYHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_neon)); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH + +////////////////////////////////////////////////////// +// Compound convolve-2d-copy functions (low bit-depth) +////////////////////////////////////////////////////// +typedef void (*compound_conv_2d_copy_func)(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int w, + int h, ConvolveParams *conv_params); + +class AV1Convolve2DCopyCompoundTest + : public AV1ConvolveTest<compound_conv_2d_copy_func> { + public: + void RunTest() { + auto compound_params = GetCompoundParams(); + for (const auto &compound : compound_params) { + TestConvolve(compound); + } + } + void SpeedTest() { + for (const auto &compound : GetCompoundParams()) { + TestConvolveSpeed(compound, 100000); + } + } + + private: + void TestConvolve(const CompoundParam &compound) { + const BlockSize &block = GetParam().Block(); + const int width = block.Width(); + const int height = block.Height(); + + const uint8_t *input1 = FirstRandomInput8(GetParam()); + const uint8_t *input2 = SecondRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); + Convolve(av1_dist_wtd_convolve_2d_copy_c, input1, input2, reference, + reference_conv_buf, compound); + + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); + Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, + compound); + + AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); + AssertOutputBufferEq(reference, test, width, height); + } + + void TestConvolveSpeed(const CompoundParam &compound, const int num_iters) { + const int width = GetParam().Block().Width(); + const int height = GetParam().Block().Height(); + + const uint8_t *src0 = FirstRandomInput8(GetParam()); + const uint8_t *src1 = SecondRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, dst[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, conv_buf[MAX_SB_SQUARE]); + + const auto test_func = GetParam().TestFunction(); + + ConvolveParams conv_params_0 = + GetConvolveParams(0, conv_buf, kOutputStride, 8, compound); + ConvolveParams conv_params_1 = + GetConvolveParams(1, conv_buf, kOutputStride, 8, compound); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + av1_dist_wtd_convolve_2d_copy_c(src0, width, dst, kOutputStride, width, + height, &conv_params_0); + av1_dist_wtd_convolve_2d_copy_c(src1, width, dst, kOutputStride, width, + height, &conv_params_1); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + aom_usec_timer_start(&timer); + for (int i = 0; i < num_iters; ++i) { + test_func(src0, width, dst, kOutputStride, width, height, &conv_params_0); + test_func(src1, width, dst, kOutputStride, width, height, &conv_params_1); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + printf("Dist Weighted: %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", + compound.UseDistWtdCompAvg(), width, height, time1, time2, + time1 / time2); + } + + void Convolve(compound_conv_2d_copy_func test_func, const uint8_t *src1, + const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf, + const CompoundParam &compound) { + const BlockSize &block = GetParam().Block(); + const int width = block.Width(); + const int height = block.Height(); + ConvolveParams conv_params = + GetConvolveParams(0, conv_buf, kOutputStride, 8, compound); + test_func(src1, width, dst, kOutputStride, width, height, &conv_params); + + conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound); + test_func(src2, width, dst, kOutputStride, width, height, &conv_params); + } +}; + +TEST_P(AV1Convolve2DCopyCompoundTest, RunTest) { RunTest(); } +TEST_P(AV1Convolve2DCopyCompoundTest, DISABLED_SpeedTest) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCopyCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AV1Convolve2DCopyCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_sse2)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1Convolve2DCopyCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1Convolve2DCopyCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +/////////////////////////////////////////////////////// +// Compound convolve-2d-copy functions (high bit-depth) +/////////////////////////////////////////////////////// +typedef void (*highbd_compound_conv_2d_copy_func)(const uint16_t *src, + int src_stride, uint16_t *dst, + int dst_stride, int w, int h, + ConvolveParams *conv_params, + int bd); + +class AV1Convolve2DCopyHighbdCompoundTest + : public AV1ConvolveTest<highbd_compound_conv_2d_copy_func> { + public: + void RunTest() { + auto compound_params = GetCompoundParams(); + for (const auto &compound : compound_params) { + TestConvolve(compound); + } + } + + private: + void TestConvolve(const CompoundParam &compound) { + const BlockSize &block = GetParam().Block(); + const int width = block.Width(); + const int height = block.Height(); + + const uint16_t *input1 = FirstRandomInput16(GetParam()); + const uint16_t *input2 = SecondRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); + Convolve(av1_highbd_dist_wtd_convolve_2d_copy_c, input1, input2, reference, + reference_conv_buf, compound); + + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); + Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, + compound); + + AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); + AssertOutputBufferEq(reference, test, width, height); + } + + void Convolve(highbd_compound_conv_2d_copy_func test_func, + const uint16_t *src1, const uint16_t *src2, uint16_t *dst, + uint16_t *conv_buf, const CompoundParam &compound) { + const BlockSize &block = GetParam().Block(); + const int width = block.Width(); + const int height = block.Height(); + const int bit_depth = GetParam().BitDepth(); + + ConvolveParams conv_params = + GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound); + test_func(src1, width, dst, kOutputStride, width, height, &conv_params, + bit_depth); + + conv_params = + GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound); + test_func(src2, width, dst, kOutputStride, width, height, &conv_params, + bit_depth); + } +}; + +TEST_P(AV1Convolve2DCopyHighbdCompoundTest, RunTest) { RunTest(); } + +INSTANTIATE_TEST_SUITE_P( + C, AV1Convolve2DCopyHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_c)); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1Convolve2DCopyHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_sse4_1)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1Convolve2DCopyHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1Convolve2DCopyHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_neon)); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH + +///////////////////////////////////////////////// +// Compound convolve-2d functions (low bit-depth) +///////////////////////////////////////////////// + +class AV1Convolve2DCompoundTest : public AV1ConvolveTest<convolve_2d_func> { + public: + void RunTest() { + auto compound_params = GetCompoundParams(); + for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) { + for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) { + for (int sub_x = 0; sub_x < 16; ++sub_x) { + for (int sub_y = 0; sub_y < 16; ++sub_y) { + for (const auto &compound : compound_params) { + TestConvolve(static_cast<InterpFilter>(h_f), + static_cast<InterpFilter>(v_f), sub_x, sub_y, + compound); + } + } + } + } + } + } + + private: + void TestConvolve(const InterpFilter h_f, const InterpFilter v_f, + const int sub_x, const int sub_y, + const CompoundParam &compound) { + const BlockSize &block = GetParam().Block(); + const int width = block.Width(); + const int height = block.Height(); + + const uint8_t *input1 = FirstRandomInput8(GetParam()); + const uint8_t *input2 = SecondRandomInput8(GetParam()); + DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); + Convolve(av1_dist_wtd_convolve_2d_c, input1, input2, reference, + reference_conv_buf, compound, h_f, v_f, sub_x, sub_y); + + DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); + Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, + compound, h_f, v_f, sub_x, sub_y); + + AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); + AssertOutputBufferEq(reference, test, width, height); + } + + private: + void Convolve(convolve_2d_func test_func, const uint8_t *src1, + const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf, + const CompoundParam &compound, const InterpFilter h_f, + const InterpFilter v_f, const int sub_x, const int sub_y) { + const BlockSize &block = GetParam().Block(); + const int width = block.Width(); + const int height = block.Height(); + + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(h_f, width); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(v_f, height); + ConvolveParams conv_params = + GetConvolveParams(0, conv_buf, kOutputStride, 8, compound); + + test_func(src1, width, dst, kOutputStride, width, height, filter_params_x, + filter_params_y, sub_x, sub_y, &conv_params); + + conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound); + test_func(src2, width, dst, kOutputStride, width, height, filter_params_x, + filter_params_y, sub_x, sub_y, &conv_params); + } +}; + +TEST_P(AV1Convolve2DCompoundTest, RunTest) { RunTest(); } + +INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_c)); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1Convolve2DCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_sse2)); +#endif + +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_ssse3)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon)); +#endif + +#if HAVE_NEON_DOTPROD +INSTANTIATE_TEST_SUITE_P( + NEON_DOTPROD, AV1Convolve2DCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_dotprod)); +#endif + +#if HAVE_NEON_I8MM +INSTANTIATE_TEST_SUITE_P( + NEON_I8MM, AV1Convolve2DCompoundTest, + BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_i8mm)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +////////////////////////////////////////////////// +// Compound convolve-2d functions (high bit-depth) +////////////////////////////////////////////////// + +class AV1Convolve2DHighbdCompoundTest + : public AV1ConvolveTest<highbd_convolve_2d_func> { + public: + void RunTest() { + auto compound_params = GetCompoundParams(); + for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) { + for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) { + for (int sub_x = 0; sub_x < 16; ++sub_x) { + for (int sub_y = 0; sub_y < 16; ++sub_y) { + for (const auto &compound : compound_params) { + TestConvolve(static_cast<InterpFilter>(h_f), + static_cast<InterpFilter>(v_f), sub_x, sub_y, + compound); + } + } + } + } + } + } + + private: + void TestConvolve(const InterpFilter h_f, const InterpFilter v_f, + const int sub_x, const int sub_y, + const CompoundParam &compound) { + const BlockSize &block = GetParam().Block(); + const int width = block.Width(); + const int height = block.Height(); + const uint16_t *input1 = FirstRandomInput16(GetParam()); + const uint16_t *input2 = SecondRandomInput16(GetParam()); + DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); + Convolve(av1_highbd_dist_wtd_convolve_2d_c, input1, input2, reference, + reference_conv_buf, compound, h_f, v_f, sub_x, sub_y); + + DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); + Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, + compound, h_f, v_f, sub_x, sub_y); + + AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); + AssertOutputBufferEq(reference, test, width, height); + } + + private: + void Convolve(highbd_convolve_2d_func test_func, const uint16_t *src1, + const uint16_t *src2, uint16_t *dst, uint16_t *conv_buf, + const CompoundParam &compound, const InterpFilter h_f, + const InterpFilter v_f, const int sub_x, const int sub_y) { + const BlockSize &block = GetParam().Block(); + const int width = block.Width(); + const int height = block.Height(); + + const InterpFilterParams *filter_params_x = + av1_get_interp_filter_params_with_block_size(h_f, width); + const InterpFilterParams *filter_params_y = + av1_get_interp_filter_params_with_block_size(v_f, height); + const int bit_depth = GetParam().BitDepth(); + ConvolveParams conv_params = + GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound); + test_func(src1, width, dst, kOutputStride, width, height, filter_params_x, + filter_params_y, sub_x, sub_y, &conv_params, bit_depth); + + conv_params = + GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound); + test_func(src2, width, dst, kOutputStride, width, height, filter_params_x, + filter_params_y, sub_x, sub_y, &conv_params, bit_depth); + } +}; + +TEST_P(AV1Convolve2DHighbdCompoundTest, RunTest) { RunTest(); } + +INSTANTIATE_TEST_SUITE_P( + C, AV1Convolve2DHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_c)); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1Convolve2DHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sse4_1)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1Convolve2DHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1Convolve2DHighbdCompoundTest, + BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_neon)); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/av1_encoder_parms_get_to_decoder.cc b/third_party/aom/test/av1_encoder_parms_get_to_decoder.cc new file mode 100644 index 0000000000..402e70c34a --- /dev/null +++ b/third_party/aom/test/av1_encoder_parms_get_to_decoder.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +#include "aom/aom_decoder.h" +#include "av1/decoder/decoder.h" + +namespace { + +const int kMaxPsnr = 100; + +struct ParamPassingTestVideo { + const char *name; + uint32_t width; + uint32_t height; + uint32_t bitrate; + int frames; +}; + +const ParamPassingTestVideo kAV1ParamPassingTestVector = { + "niklas_1280_720_30.y4m", 1280, 720, 600, 3 +}; + +struct EncodeParameters { + int32_t lossless; + aom_color_primaries_t color_primaries; + aom_transfer_characteristics_t transfer_characteristics; + aom_matrix_coefficients_t matrix_coefficients; + aom_color_range_t color_range; + aom_chroma_sample_position_t chroma_sample_position; + int32_t render_size[2]; +}; + +const EncodeParameters kAV1EncodeParameterSet[] = { + { 1, + AOM_CICP_CP_BT_709, + AOM_CICP_TC_BT_709, + AOM_CICP_MC_BT_709, + AOM_CR_STUDIO_RANGE, + AOM_CSP_UNKNOWN, + { 0, 0 } }, + { 0, + AOM_CICP_CP_BT_470_M, + AOM_CICP_TC_BT_470_M, + AOM_CICP_MC_BT_470_B_G, + AOM_CR_FULL_RANGE, + AOM_CSP_VERTICAL, + { 0, 0 } }, + { 1, + AOM_CICP_CP_BT_601, + AOM_CICP_TC_BT_601, + AOM_CICP_MC_BT_601, + AOM_CR_STUDIO_RANGE, + AOM_CSP_COLOCATED, + { 0, 0 } }, + { 0, + AOM_CICP_CP_BT_2020, + AOM_CICP_TC_BT_2020_10_BIT, + AOM_CICP_MC_BT_2020_NCL, + AOM_CR_FULL_RANGE, + AOM_CSP_RESERVED, + { 640, 480 } }, +}; + +class AVxEncoderParmsGetToDecoder + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<EncodeParameters> { + protected: + AVxEncoderParmsGetToDecoder() + : EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {} + + ~AVxEncoderParmsGetToDecoder() override = default; + + void SetUp() override { + InitializeConfig(::libaom_test::kTwoPassGood); + cfg_.g_lag_in_frames = 25; + test_video_ = kAV1ParamPassingTestVector; + cfg_.rc_target_bitrate = test_video_.bitrate; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 3); + encoder->Control(AV1E_SET_COLOR_PRIMARIES, encode_parms.color_primaries); + encoder->Control(AV1E_SET_TRANSFER_CHARACTERISTICS, + encode_parms.transfer_characteristics); + encoder->Control(AV1E_SET_MATRIX_COEFFICIENTS, + encode_parms.matrix_coefficients); + encoder->Control(AV1E_SET_COLOR_RANGE, encode_parms.color_range); + encoder->Control(AV1E_SET_CHROMA_SAMPLE_POSITION, + encode_parms.chroma_sample_position); + encoder->Control(AV1E_SET_LOSSLESS, encode_parms.lossless); + if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) { + encoder->Control(AV1E_SET_RENDER_SIZE, encode_parms.render_size); + } + } + } + + void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) override { + (void)pts; + if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) { + EXPECT_EQ(encode_parms.render_size[0], (int)img.r_w); + EXPECT_EQ(encode_parms.render_size[1], (int)img.r_h); + } + EXPECT_EQ(encode_parms.color_primaries, img.cp); + EXPECT_EQ(encode_parms.transfer_characteristics, img.tc); + EXPECT_EQ(encode_parms.matrix_coefficients, img.mc); + EXPECT_EQ(encode_parms.color_range, img.range); + EXPECT_EQ(encode_parms.chroma_sample_position, img.csp); + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + if (encode_parms.lossless) { + EXPECT_EQ(kMaxPsnr, pkt->data.psnr.psnr[0]); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + return AOM_CODEC_OK == res_dec; + } + + ParamPassingTestVideo test_video_; + + private: + EncodeParameters encode_parms; +}; + +TEST_P(AVxEncoderParmsGetToDecoder, BitstreamParms) { + init_flags_ = AOM_CODEC_USE_PSNR; + + std::unique_ptr<libaom_test::VideoSource> video( + new libaom_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames)); + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); +} + +AV1_INSTANTIATE_TEST_SUITE(AVxEncoderParmsGetToDecoder, + ::testing::ValuesIn(kAV1EncodeParameterSet)); +} // namespace diff --git a/third_party/aom/test/av1_ext_tile_test.cc b/third_party/aom/test/av1_ext_tile_test.cc new file mode 100644 index 0000000000..59c44cad12 --- /dev/null +++ b/third_party/aom/test/av1_ext_tile_test.cc @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <assert.h> +#include <string> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/md5_helper.h" +#include "test/util.h" + +namespace { +// The number of frames to be encoded/decoded +const int kLimit = 8; +// Skip 1 frame to check the frame decoding independency. +const int kSkip = 5; +const int kTileSize = 1; +const int kTIleSizeInPixels = (kTileSize << 6); +// Fake width and height so that they can be multiples of the tile size. +const int kImgWidth = 704; +const int kImgHeight = 576; + +// This test tests large scale tile coding case. Non-large-scale tile coding +// is tested by the tile_independence test. +class AV1ExtTileTest + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, + public ::libaom_test::EncoderTest { + protected: + AV1ExtTileTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + set_cpu_used_(GET_PARAM(2)) { + init_flags_ = AOM_CODEC_USE_PSNR; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = kImgWidth; + cfg.h = kImgHeight; + cfg.allow_lowbitdepth = 1; + + decoder_ = codec_->CreateDecoder(cfg, 0); + decoder_->Control(AV1_SET_TILE_MODE, 1); + decoder_->Control(AV1D_EXT_TILE_DEBUG, 1); + decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder_->Control(AV1_SET_DECODE_TILE_COL, -1); + + // Allocate buffer to store tile image. + aom_img_alloc(&tile_img_, AOM_IMG_FMT_I420, kImgWidth, kImgHeight, 32); + + md5_.clear(); + tile_md5_.clear(); + } + + ~AV1ExtTileTest() override { + aom_img_free(&tile_img_); + delete decoder_; + } + + void SetUp() override { + InitializeConfig(encoding_mode_); + + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = AOM_VBR; + cfg_.g_error_resilient = 1; + + cfg_.rc_max_quantizer = 56; + cfg_.rc_min_quantizer = 0; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + // Encode setting + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + + // TODO(yunqingwang): test single_tile_decoding = 0. + encoder->Control(AV1E_SET_SINGLE_TILE_DECODING, 1); + // Always use 64x64 max partition. + encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, AOM_SUPERBLOCK_SIZE_64X64); + // Set tile_columns and tile_rows to MAX values, which guarantees the tile + // size of 64 x 64 pixels(i.e. 1 SB) for <= 4k resolution. + encoder->Control(AV1E_SET_TILE_COLUMNS, 6); + encoder->Control(AV1E_SET_TILE_ROWS, 6); + } else if (video->frame() == 1) { + frame_flags_ = + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF; + } + } + + void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) override { + // Skip 1 already decoded frame to be consistent with the decoder in this + // test. + if (pts == (aom_codec_pts_t)kSkip) return; + + // Calculate MD5 as the reference. + ::libaom_test::MD5 md5_res; + md5_res.Add(&img); + md5_.push_back(md5_res.Get()); + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + // Skip decoding 1 frame. + if (pkt->data.frame.pts == (aom_codec_pts_t)kSkip) return; + + bool IsLastFrame = (pkt->data.frame.pts == (aom_codec_pts_t)(kLimit - 1)); + + // Decode the first (kLimit - 1) frames as whole frame, and decode the last + // frame in single tiles. + for (int r = 0; r < kImgHeight / kTIleSizeInPixels; ++r) { + for (int c = 0; c < kImgWidth / kTIleSizeInPixels; ++c) { + if (!IsLastFrame) { + decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder_->Control(AV1_SET_DECODE_TILE_COL, -1); + } else { + decoder_->Control(AV1_SET_DECODE_TILE_ROW, r); + decoder_->Control(AV1_SET_DECODE_TILE_COL, c); + } + + const aom_codec_err_t res = decoder_->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + const aom_image_t *img = decoder_->GetDxData().Next(); + + if (!IsLastFrame) { + if (img) { + ::libaom_test::MD5 md5_res; + md5_res.Add(img); + tile_md5_.push_back(md5_res.Get()); + } + break; + } + + const int kMaxMBPlane = 3; + for (int plane = 0; plane < kMaxMBPlane; ++plane) { + const int shift = (plane == 0) ? 0 : 1; + int tile_height = kTIleSizeInPixels >> shift; + int tile_width = kTIleSizeInPixels >> shift; + + for (int tr = 0; tr < tile_height; ++tr) { + memcpy(tile_img_.planes[plane] + + tile_img_.stride[plane] * (r * tile_height + tr) + + c * tile_width, + img->planes[plane] + img->stride[plane] * tr, tile_width); + } + } + } + + if (!IsLastFrame) break; + } + + if (IsLastFrame) { + ::libaom_test::MD5 md5_res; + md5_res.Add(&tile_img_); + tile_md5_.push_back(md5_res.Get()); + } + } + + void TestRoundTrip() { + ::libaom_test::I420VideoSource video( + "hantro_collage_w352h288.yuv", kImgWidth, kImgHeight, 30, 1, 0, kLimit); + cfg_.rc_target_bitrate = 500; + cfg_.g_error_resilient = AOM_ERROR_RESILIENT_DEFAULT; + cfg_.large_scale_tile = 1; + cfg_.g_lag_in_frames = 0; + cfg_.g_threads = 1; + + // Tile encoding + init_flags_ = AOM_CODEC_USE_PSNR; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Compare to check if two vectors are equal. + ASSERT_EQ(md5_, tile_md5_); + } + + ::libaom_test::TestMode encoding_mode_; + int set_cpu_used_; + ::libaom_test::Decoder *decoder_; + aom_image_t tile_img_; + std::vector<std::string> md5_; + std::vector<std::string> tile_md5_; +}; + +TEST_P(AV1ExtTileTest, DecoderResultTest) { TestRoundTrip(); } + +AV1_INSTANTIATE_TEST_SUITE( + // Now only test 2-pass mode. + AV1ExtTileTest, ::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Range(1, 4)); + +class AV1ExtTileTestLarge : public AV1ExtTileTest {}; + +TEST_P(AV1ExtTileTestLarge, DecoderResultTest) { TestRoundTrip(); } + +AV1_INSTANTIATE_TEST_SUITE( + // Now only test 2-pass mode. + AV1ExtTileTestLarge, ::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Range(0, 1)); +} // namespace diff --git a/third_party/aom/test/av1_external_partition_test.cc b/third_party/aom/test/av1_external_partition_test.cc new file mode 100644 index 0000000000..88f6216fa5 --- /dev/null +++ b/third_party/aom/test/av1_external_partition_test.cc @@ -0,0 +1,702 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <fstream> +#include <new> +#include <sstream> +#include <string> + +#include "aom/aom_codec.h" +#include "aom/aom_external_partition.h" +#include "av1/common/blockd.h" +#include "av1/encoder/encodeframe_utils.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/y4m_video_source.h" +#include "test/util.h" + +#if CONFIG_AV1_ENCODER +#if !CONFIG_REALTIME_ONLY +namespace { + +constexpr int kFrameNum = 8; +constexpr int kVersion = 1; + +typedef struct TestData { + int version = kVersion; +} TestData; + +typedef struct ToyModel { + TestData *data; + aom_ext_part_config_t config; + aom_ext_part_funcs_t funcs; + int mi_row; + int mi_col; + int frame_width; + int frame_height; + BLOCK_SIZE block_size; +} ToyModel; + +// Note: +// if CONFIG_PARTITION_SEARCH_ORDER = 0, we test APIs designed for the baseline +// encoder's DFS partition search workflow. +// if CONFIG_PARTITION_SEARCH_ORDER = 1, we test APIs designed for the new +// ML model's partition search workflow. +#if CONFIG_PARTITION_SEARCH_ORDER +aom_ext_part_status_t ext_part_create_model( + void *priv, const aom_ext_part_config_t *part_config, + aom_ext_part_model_t *ext_part_model) { + TestData *received_data = reinterpret_cast<TestData *>(priv); + EXPECT_EQ(received_data->version, kVersion); + ToyModel *toy_model = new (std::nothrow) ToyModel; + if (toy_model == nullptr) { + EXPECT_NE(toy_model, nullptr); + return AOM_EXT_PART_ERROR; + } + toy_model->data = received_data; + *ext_part_model = toy_model; + EXPECT_EQ(part_config->superblock_size, BLOCK_64X64); + return AOM_EXT_PART_OK; +} + +aom_ext_part_status_t ext_part_send_features( + aom_ext_part_model_t ext_part_model, + const aom_partition_features_t *part_features) { + ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model); + toy_model->mi_row = part_features->mi_row; + toy_model->mi_col = part_features->mi_col; + toy_model->frame_width = part_features->frame_width; + toy_model->frame_height = part_features->frame_height; + toy_model->block_size = static_cast<BLOCK_SIZE>(part_features->block_size); + return AOM_EXT_PART_OK; +} + +// The model provide the whole decision tree to the encoder. +aom_ext_part_status_t ext_part_get_partition_decision_whole_tree( + aom_ext_part_model_t ext_part_model, + aom_partition_decision_t *ext_part_decision) { + ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model); + // A toy model that always asks the encoder to encode with + // 4x4 blocks (the smallest). + ext_part_decision->is_final_decision = 1; + // Note: super block size is fixed to BLOCK_64X64 for the + // input video. It is determined inside the encoder, see the + // check in "ext_part_create_model". + const int is_last_sb_col = + toy_model->mi_col * 4 + 64 > toy_model->frame_width; + const int is_last_sb_row = + toy_model->mi_row * 4 + 64 > toy_model->frame_height; + if (is_last_sb_row && is_last_sb_col) { + // 64x64: 1 node + // 32x32: 4 nodes (only the first one will further split) + // 16x16: 4 nodes + // 8x8: 4 * 4 nodes + // 4x4: 4 * 4 * 4 nodes + const int num_blocks = 1 + 4 + 4 + 4 * 4 + 4 * 4 * 4; + const int num_4x4_blocks = 4 * 4 * 4; + ext_part_decision->num_nodes = num_blocks; + // 64x64 + ext_part_decision->partition_decision[0] = PARTITION_SPLIT; + // 32x32, only the first one will split, the other three are + // out of frame boundary. + ext_part_decision->partition_decision[1] = PARTITION_SPLIT; + ext_part_decision->partition_decision[2] = PARTITION_NONE; + ext_part_decision->partition_decision[3] = PARTITION_NONE; + ext_part_decision->partition_decision[4] = PARTITION_NONE; + // The rest blocks inside the top-left 32x32 block. + for (int i = 5; i < num_blocks - num_4x4_blocks; ++i) { + ext_part_decision->partition_decision[0] = PARTITION_SPLIT; + } + for (int i = num_blocks - num_4x4_blocks; i < num_blocks; ++i) { + ext_part_decision->partition_decision[i] = PARTITION_NONE; + } + } else if (is_last_sb_row) { + // 64x64: 1 node + // 32x32: 4 nodes (only the first two will further split) + // 16x16: 2 * 4 nodes + // 8x8: 2 * 4 * 4 nodes + // 4x4: 2 * 4 * 4 * 4 nodes + const int num_blocks = 1 + 4 + 2 * 4 + 2 * 4 * 4 + 2 * 4 * 4 * 4; + const int num_4x4_blocks = 2 * 4 * 4 * 4; + ext_part_decision->num_nodes = num_blocks; + // 64x64 + ext_part_decision->partition_decision[0] = PARTITION_SPLIT; + // 32x32, only the first two will split, the other two are out + // of frame boundary. + ext_part_decision->partition_decision[1] = PARTITION_SPLIT; + ext_part_decision->partition_decision[2] = PARTITION_SPLIT; + ext_part_decision->partition_decision[3] = PARTITION_NONE; + ext_part_decision->partition_decision[4] = PARTITION_NONE; + // The rest blocks. + for (int i = 5; i < num_blocks - num_4x4_blocks; ++i) { + ext_part_decision->partition_decision[0] = PARTITION_SPLIT; + } + for (int i = num_blocks - num_4x4_blocks; i < num_blocks; ++i) { + ext_part_decision->partition_decision[i] = PARTITION_NONE; + } + } else if (is_last_sb_col) { + // 64x64: 1 node + // 32x32: 4 nodes (only the top-left and bottom-left will further split) + // 16x16: 2 * 4 nodes + // 8x8: 2 * 4 * 4 nodes + // 4x4: 2 * 4 * 4 * 4 nodes + const int num_blocks = 1 + 4 + 2 * 4 + 2 * 4 * 4 + 2 * 4 * 4 * 4; + const int num_4x4_blocks = 2 * 4 * 4 * 4; + ext_part_decision->num_nodes = num_blocks; + // 64x64 + ext_part_decision->partition_decision[0] = PARTITION_SPLIT; + // 32x32, only the top-left and bottom-left will split, the other two are + // out of frame boundary. + ext_part_decision->partition_decision[1] = PARTITION_SPLIT; + ext_part_decision->partition_decision[2] = PARTITION_NONE; + ext_part_decision->partition_decision[3] = PARTITION_SPLIT; + ext_part_decision->partition_decision[4] = PARTITION_NONE; + // The rest blocks. + for (int i = 5; i < num_blocks - num_4x4_blocks; ++i) { + ext_part_decision->partition_decision[0] = PARTITION_SPLIT; + } + for (int i = num_blocks - num_4x4_blocks; i < num_blocks; ++i) { + ext_part_decision->partition_decision[i] = PARTITION_NONE; + } + } else { + // 64x64: 1 node + // 32x32: 4 nodes + // 16x16: 4 * 4 nodes + // 8x8: 4 * 4 * 4 nodes + // 4x4: 4 * 4 * 4 * 4 nodes + const int num_blocks = 1 + 4 + 4 * 4 + 4 * 4 * 4 + 4 * 4 * 4 * 4; + const int num_4x4_blocks = 4 * 4 * 4 * 4; + ext_part_decision->num_nodes = num_blocks; + for (int i = 0; i < num_blocks - num_4x4_blocks; ++i) { + ext_part_decision->partition_decision[i] = PARTITION_SPLIT; + } + for (int i = num_blocks - num_4x4_blocks; i < num_blocks; ++i) { + ext_part_decision->partition_decision[i] = PARTITION_NONE; + } + } + + return AOM_EXT_PART_OK; +} + +aom_ext_part_status_t ext_part_get_partition_decision_recursive( + aom_ext_part_model_t ext_part_model, + aom_partition_decision_t *ext_part_decision) { + ext_part_decision->current_decision = PARTITION_NONE; + ext_part_decision->is_final_decision = 1; + ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model); + // Note: super block size is fixed to BLOCK_64X64 for the + // input video. It is determined inside the encoder, see the + // check in "ext_part_create_model". + const int is_last_sb_col = + toy_model->mi_col * 4 + 64 > toy_model->frame_width; + const int is_last_sb_row = + toy_model->mi_row * 4 + 64 > toy_model->frame_height; + if (is_last_sb_row && is_last_sb_col) { + if (block_size_wide[toy_model->block_size] == 64) { + ext_part_decision->current_decision = PARTITION_SPLIT; + } else { + ext_part_decision->current_decision = PARTITION_NONE; + } + } else if (is_last_sb_row) { + if (block_size_wide[toy_model->block_size] == 64) { + ext_part_decision->current_decision = PARTITION_SPLIT; + } else { + ext_part_decision->current_decision = PARTITION_NONE; + } + } else if (is_last_sb_col) { + if (block_size_wide[toy_model->block_size] == 64) { + ext_part_decision->current_decision = PARTITION_SPLIT; + } else { + ext_part_decision->current_decision = PARTITION_NONE; + } + } else { + ext_part_decision->current_decision = PARTITION_NONE; + } + return AOM_EXT_PART_OK; +} + +aom_ext_part_status_t ext_part_send_partition_stats( + aom_ext_part_model_t ext_part_model, + const aom_partition_stats_t *ext_part_stats) { + (void)ext_part_model; + (void)ext_part_stats; + return AOM_EXT_PART_OK; +} + +aom_ext_part_status_t ext_part_delete_model( + aom_ext_part_model_t ext_part_model) { + ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model); + EXPECT_EQ(toy_model->data->version, kVersion); + delete toy_model; + return AOM_EXT_PART_OK; +} + +class ExternalPartitionTestAPI + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, + public ::libaom_test::EncoderTest { + protected: + ExternalPartitionTestAPI() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0) {} + ~ExternalPartitionTestAPI() override {} + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = AOM_VBR; + cfg_.g_threads = 1; + cfg_.g_lag_in_frames = 4; + cfg_.rc_target_bitrate = 400; + init_flags_ = AOM_CODEC_USE_PSNR; + } + + bool DoDecode() const override { return false; } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + void SetExternalPartition(bool use_external_partition) { + use_external_partition_ = use_external_partition; + } + + void SetPartitionControlMode(int mode) { partition_control_mode_ = mode; } + + void SetDecisionMode(aom_ext_part_decision_mode_t mode) { + decision_mode_ = mode; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + if (decision_mode_ == AOM_EXT_PART_WHOLE_TREE) { + aom_ext_part_funcs_t ext_part_funcs; + ext_part_funcs.priv = reinterpret_cast<void *>(&test_data_); + ext_part_funcs.decision_mode = AOM_EXT_PART_WHOLE_TREE; + ext_part_funcs.create_model = ext_part_create_model; + ext_part_funcs.send_features = ext_part_send_features; + ext_part_funcs.get_partition_decision = + ext_part_get_partition_decision_whole_tree; + ext_part_funcs.send_partition_stats = ext_part_send_partition_stats; + ext_part_funcs.delete_model = ext_part_delete_model; + + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + if (use_external_partition_) { + encoder->Control(AV1E_SET_EXTERNAL_PARTITION, &ext_part_funcs); + } + if (partition_control_mode_ == -1) { + encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 128); + encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 4); + } else { + switch (partition_control_mode_) { + case 1: + encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 64); + encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 64); + break; + case 2: + encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 4); + encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 4); + break; + default: assert(0 && "Invalid partition control mode."); break; + } + } + } else if (decision_mode_ == AOM_EXT_PART_RECURSIVE) { + aom_ext_part_funcs_t ext_part_funcs; + ext_part_funcs.priv = reinterpret_cast<void *>(&test_data_); + ext_part_funcs.decision_mode = AOM_EXT_PART_RECURSIVE; + ext_part_funcs.create_model = ext_part_create_model; + ext_part_funcs.send_features = ext_part_send_features; + ext_part_funcs.get_partition_decision = + ext_part_get_partition_decision_recursive; + ext_part_funcs.send_partition_stats = ext_part_send_partition_stats; + ext_part_funcs.delete_model = ext_part_delete_model; + + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + if (use_external_partition_) { + encoder->Control(AV1E_SET_EXTERNAL_PARTITION, &ext_part_funcs); + } + if (partition_control_mode_ == -1) { + encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 128); + encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 4); + } else { + switch (partition_control_mode_) { + case 1: + encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 64); + encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 64); + break; + case 2: + encoder->Control(AV1E_SET_MAX_PARTITION_SIZE, 4); + encoder->Control(AV1E_SET_MIN_PARTITION_SIZE, 4); + break; + default: assert(0 && "Invalid partition control mode."); break; + } + } + } else { + assert(0 && "Invalid decision mode."); + } + } + } + + private: + libaom_test::TestMode encoding_mode_; + int cpu_used_; + double psnr_; + unsigned int nframes_; + bool use_external_partition_ = false; + TestData test_data_; + int partition_control_mode_ = -1; + aom_ext_part_decision_mode_t decision_mode_; +}; + +// Encode twice and expect the same psnr value. +// The first run is a normal encoding run with restricted partition types, +// i.e., we use control flags to force the encoder to encode with the +// 4x4 block size. +// The second run is to get partition decisions from a toy model that we +// built, which will asks the encoder to encode with the 4x4 blocks. +// We expect the encoding results are the same. +TEST_P(ExternalPartitionTestAPI, WholePartitionTree4x4Block) { + ::libaom_test::Y4mVideoSource video("paris_352_288_30.y4m", 0, kFrameNum); + SetExternalPartition(false); + SetPartitionControlMode(2); + SetDecisionMode(AOM_EXT_PART_WHOLE_TREE); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr = GetAveragePsnr(); + + SetExternalPartition(true); + SetPartitionControlMode(2); + SetDecisionMode(AOM_EXT_PART_WHOLE_TREE); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr2 = GetAveragePsnr(); + + EXPECT_DOUBLE_EQ(psnr, psnr2); +} + +TEST_P(ExternalPartitionTestAPI, RecursivePartition) { + ::libaom_test::Y4mVideoSource video("paris_352_288_30.y4m", 0, kFrameNum); + SetExternalPartition(false); + SetPartitionControlMode(1); + SetDecisionMode(AOM_EXT_PART_RECURSIVE); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr = GetAveragePsnr(); + + SetExternalPartition(true); + SetPartitionControlMode(1); + SetDecisionMode(AOM_EXT_PART_RECURSIVE); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr2 = GetAveragePsnr(); + + const double psnr_thresh = 0.02; + EXPECT_NEAR(psnr, psnr2, psnr_thresh); +} + +AV1_INSTANTIATE_TEST_SUITE(ExternalPartitionTestAPI, + ::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Values(4)); // cpu_used + +#else // !CONFIG_PARTITION_SEARCH_ORDER +// Feature files written during encoding, as defined in partition_strategy.c. +std::string feature_file_names[] = { + "feature_before_partition_none", + "feature_before_partition_none_prune_rect", + "feature_after_partition_none_prune", + "feature_after_partition_none_terminate", + "feature_after_partition_split_terminate", + "feature_after_partition_split_prune_rect", + "feature_after_partition_rect", + "feature_after_partition_ab", +}; + +// Files written here in the test, where the feature data is received +// from the API. +std::string test_feature_file_names[] = { + "test_feature_before_partition_none", + "test_feature_before_partition_none_prune_rect", + "test_feature_after_partition_none_prune", + "test_feature_after_partition_none_terminate", + "test_feature_after_partition_split_terminate", + "test_feature_after_partition_split_prune_rect", + "test_feature_after_partition_rect", + "test_feature_after_partition_ab", +}; + +static void write_features_to_file(const float *features, + const int feature_size, const int id) { + if (!WRITE_FEATURE_TO_FILE) return; + char filename[256]; + snprintf(filename, sizeof(filename), "%s", + test_feature_file_names[id].c_str()); + FILE *pfile = fopen(filename, "a"); + ASSERT_NE(pfile, nullptr); + for (int i = 0; i < feature_size; ++i) { + fprintf(pfile, "%.6f", features[i]); + if (i < feature_size - 1) fprintf(pfile, ","); + } + fprintf(pfile, "\n"); + fclose(pfile); +} + +aom_ext_part_status_t ext_part_create_model( + void *priv, const aom_ext_part_config_t *part_config, + aom_ext_part_model_t *ext_part_model) { + TestData *received_data = reinterpret_cast<TestData *>(priv); + EXPECT_EQ(received_data->version, kVersion); + ToyModel *toy_model = new (std::nothrow) ToyModel; + if (toy_model == nullptr) { + EXPECT_NE(toy_model, nullptr); + return AOM_EXT_PART_ERROR; + } + toy_model->data = received_data; + *ext_part_model = toy_model; + EXPECT_EQ(part_config->superblock_size, BLOCK_64X64); + return AOM_EXT_PART_OK; +} + +aom_ext_part_status_t ext_part_create_model_test( + void *priv, const aom_ext_part_config_t *part_config, + aom_ext_part_model_t *ext_part_model) { + (void)priv; + (void)ext_part_model; + EXPECT_EQ(part_config->superblock_size, BLOCK_64X64); + // Return status indicates it's a encoder test. It lets the encoder + // set a flag and write partition features to text files. + return AOM_EXT_PART_TEST; +} + +aom_ext_part_status_t ext_part_send_features( + aom_ext_part_model_t ext_part_model, + const aom_partition_features_t *part_features) { + (void)ext_part_model; + (void)part_features; + return AOM_EXT_PART_OK; +} + +aom_ext_part_status_t ext_part_send_features_test( + aom_ext_part_model_t ext_part_model, + const aom_partition_features_t *part_features) { + (void)ext_part_model; + if (part_features->id == AOM_EXT_PART_FEATURE_BEFORE_NONE) { + write_features_to_file(part_features->before_part_none.f, + AOM_EXT_PART_SIZE_DIRECT_SPLIT, 0); + } else if (part_features->id == AOM_EXT_PART_FEATURE_BEFORE_NONE_PART2) { + write_features_to_file(part_features->before_part_none.f_part2, + AOM_EXT_PART_SIZE_PRUNE_PART, 1); + } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_NONE) { + write_features_to_file(part_features->after_part_none.f, + AOM_EXT_PART_SIZE_PRUNE_NONE, 2); + } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_NONE_PART2) { + write_features_to_file(part_features->after_part_none.f_terminate, + AOM_EXT_PART_SIZE_TERM_NONE, 3); + } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_SPLIT) { + write_features_to_file(part_features->after_part_split.f_terminate, + AOM_EXT_PART_SIZE_TERM_SPLIT, 4); + } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_SPLIT_PART2) { + write_features_to_file(part_features->after_part_split.f_prune_rect, + AOM_EXT_PART_SIZE_PRUNE_RECT, 5); + } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_RECT) { + write_features_to_file(part_features->after_part_rect.f, + AOM_EXT_PART_SIZE_PRUNE_AB, 6); + } else if (part_features->id == AOM_EXT_PART_FEATURE_AFTER_AB) { + write_features_to_file(part_features->after_part_ab.f, + AOM_EXT_PART_SIZE_PRUNE_4_WAY, 7); + } + return AOM_EXT_PART_TEST; +} + +aom_ext_part_status_t ext_part_get_partition_decision( + aom_ext_part_model_t ext_part_model, + aom_partition_decision_t *ext_part_decision) { + (void)ext_part_model; + (void)ext_part_decision; + // Return an invalid decision such that the encoder doesn't take any + // partition decision from the ml model. + return AOM_EXT_PART_ERROR; +} + +aom_ext_part_status_t ext_part_send_partition_stats( + aom_ext_part_model_t ext_part_model, + const aom_partition_stats_t *ext_part_stats) { + (void)ext_part_model; + (void)ext_part_stats; + return AOM_EXT_PART_OK; +} + +aom_ext_part_status_t ext_part_delete_model( + aom_ext_part_model_t ext_part_model) { + ToyModel *toy_model = static_cast<ToyModel *>(ext_part_model); + EXPECT_EQ(toy_model->data->version, kVersion); + delete toy_model; + return AOM_EXT_PART_OK; +} + +class ExternalPartitionTestDfsAPI + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, + public ::libaom_test::EncoderTest { + protected: + ExternalPartitionTestDfsAPI() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0) {} + ~ExternalPartitionTestDfsAPI() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = AOM_VBR; + cfg_.g_threads = 1; + cfg_.g_lag_in_frames = 4; + cfg_.rc_target_bitrate = 400; + init_flags_ = AOM_CODEC_USE_PSNR; + } + + bool DoDecode() const override { return false; } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + void SetExternalPartition(bool use_external_partition) { + use_external_partition_ = use_external_partition; + } + + void SetTestSendFeatures(int test_send_features) { + test_send_features_ = test_send_features; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + aom_ext_part_funcs_t ext_part_funcs; + ext_part_funcs.priv = reinterpret_cast<void *>(&test_data_); + if (use_external_partition_) { + ext_part_funcs.create_model = ext_part_create_model; + ext_part_funcs.send_features = ext_part_send_features; + } + if (test_send_features_ == 1) { + ext_part_funcs.create_model = ext_part_create_model; + ext_part_funcs.send_features = ext_part_send_features_test; + } else if (test_send_features_ == 0) { + ext_part_funcs.create_model = ext_part_create_model_test; + ext_part_funcs.send_features = ext_part_send_features; + } + ext_part_funcs.get_partition_decision = ext_part_get_partition_decision; + ext_part_funcs.send_partition_stats = ext_part_send_partition_stats; + ext_part_funcs.delete_model = ext_part_delete_model; + + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + if (use_external_partition_) { + encoder->Control(AV1E_SET_EXTERNAL_PARTITION, &ext_part_funcs); + } + } + } + + private: + libaom_test::TestMode encoding_mode_; + int cpu_used_; + double psnr_; + unsigned int nframes_; + bool use_external_partition_ = false; + int test_send_features_ = -1; + TestData test_data_; +}; + +// Encode twice and expect the same psnr value. +// The first run is the baseline without external partition. +// The second run is to get partition decisions from the toy model we defined. +// Here, we let the partition decision return invalid for all stages. +// In this case, the external partition doesn't alter the original encoder +// behavior. So we expect the same encoding results. +TEST_P(ExternalPartitionTestDfsAPI, EncodeMatch) { + ::libaom_test::Y4mVideoSource video("paris_352_288_30.y4m", 0, kFrameNum); + SetExternalPartition(false); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr = GetAveragePsnr(); + + SetExternalPartition(true); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr2 = GetAveragePsnr(); + + EXPECT_DOUBLE_EQ(psnr, psnr2); +} + +// Encode twice to compare generated feature files. +// The first run let the encoder write partition features to file. +// The second run calls send partition features function to send features to +// the external model, and we write them to file. +// The generated files should match each other. +TEST_P(ExternalPartitionTestDfsAPI, SendFeatures) { + ::libaom_test::Y4mVideoSource video("paris_352_288_30.y4m", 0, kFrameNum); + SetExternalPartition(true); + SetTestSendFeatures(0); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + SetExternalPartition(true); + SetTestSendFeatures(1); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + if (!WRITE_FEATURE_TO_FILE) return; + + // Compare feature files by reading them into strings. + for (int i = 0; i < 8; ++i) { + std::ifstream base_file(feature_file_names[i]); + ASSERT_TRUE(base_file.good()); + std::stringstream base_stream; + base_stream << base_file.rdbuf(); + std::string base_string = base_stream.str(); + + std::ifstream test_file(test_feature_file_names[i]); + ASSERT_TRUE(test_file.good()); + std::stringstream test_stream; + test_stream << test_file.rdbuf(); + std::string test_string = test_stream.str(); + + EXPECT_STREQ(base_string.c_str(), test_string.c_str()); + } + + // Remove files. + std::string command("rm -f feature_* test_feature_*"); + system(command.c_str()); +} + +AV1_INSTANTIATE_TEST_SUITE(ExternalPartitionTestDfsAPI, + ::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Values(4)); // cpu_used +#endif // CONFIG_PARTITION_SEARCH_ORDER + +} // namespace +#endif // !CONFIG_REALTIME_ONLY +#endif // CONFIG_AV1_ENCODER diff --git a/third_party/aom/test/av1_fwd_txfm1d_test.cc b/third_party/aom/test/av1_fwd_txfm1d_test.cc new file mode 100644 index 0000000000..6bae9f8364 --- /dev/null +++ b/third_party/aom/test/av1_fwd_txfm1d_test.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> +#include <new> + +#include "av1/encoder/av1_fwd_txfm1d.h" +#include "test/av1_txfm_test.h" + +using libaom_test::ACMRandom; +using libaom_test::input_base; +using libaom_test::reference_hybrid_1d; +using libaom_test::TYPE_ADST; +using libaom_test::TYPE_DCT; +using libaom_test::TYPE_IDTX; +using libaom_test::TYPE_TXFM; + +namespace { +const int txfm_type_num = 3; +const TYPE_TXFM txfm_type_ls[txfm_type_num] = { TYPE_DCT, TYPE_ADST, + TYPE_IDTX }; + +const int txfm_size_num = 5; + +const int txfm_size_ls[] = { 4, 8, 16, 32, 64 }; + +const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = { + { av1_fdct4, av1_fadst4, av1_fidentity4_c }, + { av1_fdct8, av1_fadst8, av1_fidentity8_c }, + { av1_fdct16, av1_fadst16, av1_fidentity16_c }, + { av1_fdct32, nullptr, av1_fidentity32_c }, + { av1_fdct64, nullptr, nullptr }, +}; + +// the maximum stage number of fwd/inv 1d dct/adst txfm is 12 +const int8_t cos_bit = 13; +const int8_t range_bit[12] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 }; + +TEST(av1_fwd_txfm1d, round_shift) { + EXPECT_EQ(round_shift(7, 1), 4); + EXPECT_EQ(round_shift(-7, 1), -3); + + EXPECT_EQ(round_shift(7, 2), 2); + EXPECT_EQ(round_shift(-7, 2), -2); + + EXPECT_EQ(round_shift(8, 2), 2); + EXPECT_EQ(round_shift(-8, 2), -2); +} + +TEST(av1_fwd_txfm1d, av1_cospi_arr_data) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 64; j++) { + EXPECT_EQ(av1_cospi_arr_data[i][j], + (int32_t)round(cos(PI * j / 128) * (1 << (cos_bit_min + i)))); + } + } +} + +TEST(av1_fwd_txfm1d, accuracy) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int si = 0; si < txfm_size_num; ++si) { + int txfm_size = txfm_size_ls[si]; + std::unique_ptr<int32_t[]> input(new (std::nothrow) int32_t[txfm_size]); + std::unique_ptr<int32_t[]> output(new (std::nothrow) int32_t[txfm_size]); + std::unique_ptr<double[]> ref_input(new (std::nothrow) double[txfm_size]); + std::unique_ptr<double[]> ref_output(new (std::nothrow) double[txfm_size]); + ASSERT_NE(input, nullptr); + ASSERT_NE(output, nullptr); + ASSERT_NE(ref_input, nullptr); + ASSERT_NE(ref_output, nullptr); + + for (int ti = 0; ti < txfm_type_num; ++ti) { + TYPE_TXFM txfm_type = txfm_type_ls[ti]; + TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[si][ti]; + int max_error = 7; + + const int count_test_block = 5000; + if (fwd_txfm_func != nullptr) { + for (int i = 0; i < count_test_block; ++i) { + for (int ni = 0; ni < txfm_size; ++ni) { + input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base; + ref_input[ni] = static_cast<double>(input[ni]); + } + + fwd_txfm_func(input.get(), output.get(), cos_bit, range_bit); + reference_hybrid_1d(ref_input.get(), ref_output.get(), txfm_size, + txfm_type); + + for (int ni = 0; ni < txfm_size; ++ni) { + ASSERT_LE( + abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))), + max_error) + << "tx size = " << txfm_size << ", tx type = " << txfm_type; + } + } + } + } + } +} +} // namespace diff --git a/third_party/aom/test/av1_fwd_txfm2d_test.cc b/third_party/aom/test/av1_fwd_txfm2d_test.cc new file mode 100644 index 0000000000..2ed5d94db3 --- /dev/null +++ b/third_party/aom/test/av1_fwd_txfm2d_test.cc @@ -0,0 +1,692 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <tuple> +#include <vector> + +#include "config/av1_rtcd.h" + +#include "test/acm_random.h" +#include "test/util.h" +#include "test/av1_txfm_test.h" +#include "av1/common/av1_txfm.h" +#include "av1/encoder/hybrid_fwd_txfm.h" + +using libaom_test::ACMRandom; +using libaom_test::bd; +using libaom_test::compute_avg_abs_error; +using libaom_test::input_base; +using libaom_test::tx_type_name; +using libaom_test::TYPE_TXFM; + +using std::vector; + +namespace { +// tx_type_, tx_size_, max_error_, max_avg_error_ +typedef std::tuple<TX_TYPE, TX_SIZE, double, double> AV1FwdTxfm2dParam; + +class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> { + public: + void SetUp() override { + tx_type_ = GET_PARAM(0); + tx_size_ = GET_PARAM(1); + max_error_ = GET_PARAM(2); + max_avg_error_ = GET_PARAM(3); + count_ = 500; + TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg; + av1_get_fwd_txfm_cfg(tx_type_, tx_size_, &fwd_txfm_flip_cfg); + amplify_factor_ = libaom_test::get_amplification_factor(tx_type_, tx_size_); + tx_width_ = tx_size_wide[fwd_txfm_flip_cfg.tx_size]; + tx_height_ = tx_size_high[fwd_txfm_flip_cfg.tx_size]; + ud_flip_ = fwd_txfm_flip_cfg.ud_flip; + lr_flip_ = fwd_txfm_flip_cfg.lr_flip; + + fwd_txfm_ = libaom_test::fwd_txfm_func_ls[tx_size_]; + txfm2d_size_ = tx_width_ * tx_height_; + input_ = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(input_[0]) * txfm2d_size_)); + ASSERT_NE(input_, nullptr); + output_ = reinterpret_cast<int32_t *>( + aom_memalign(16, sizeof(output_[0]) * txfm2d_size_)); + ASSERT_NE(output_, nullptr); + ref_input_ = reinterpret_cast<double *>( + aom_memalign(16, sizeof(ref_input_[0]) * txfm2d_size_)); + ASSERT_NE(ref_input_, nullptr); + ref_output_ = reinterpret_cast<double *>( + aom_memalign(16, sizeof(ref_output_[0]) * txfm2d_size_)); + ASSERT_NE(ref_output_, nullptr); + } + + void RunFwdAccuracyCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + double avg_abs_error = 0; + for (int ci = 0; ci < count_; ci++) { + for (int ni = 0; ni < txfm2d_size_; ++ni) { + input_[ni] = rnd.Rand16() % input_base; + ref_input_[ni] = static_cast<double>(input_[ni]); + output_[ni] = 0; + ref_output_[ni] = 0; + } + + fwd_txfm_(input_, output_, tx_width_, tx_type_, bd); + + if (lr_flip_ && ud_flip_) { + libaom_test::fliplrud(ref_input_, tx_width_, tx_height_, tx_width_); + } else if (lr_flip_) { + libaom_test::fliplr(ref_input_, tx_width_, tx_height_, tx_width_); + } else if (ud_flip_) { + libaom_test::flipud(ref_input_, tx_width_, tx_height_, tx_width_); + } + + libaom_test::reference_hybrid_2d(ref_input_, ref_output_, tx_type_, + tx_size_); + + double actual_max_error = 0; + for (int ni = 0; ni < txfm2d_size_; ++ni) { + ref_output_[ni] = round(ref_output_[ni]); + const double this_error = + fabs(output_[ni] - ref_output_[ni]) / amplify_factor_; + actual_max_error = AOMMAX(actual_max_error, this_error); + } + EXPECT_GE(max_error_, actual_max_error) + << "tx_w: " << tx_width_ << " tx_h: " << tx_height_ + << ", tx_type = " << (int)tx_type_; + if (actual_max_error > max_error_) { // exit early. + break; + } + + avg_abs_error += compute_avg_abs_error<int32_t, double>( + output_, ref_output_, txfm2d_size_); + } + + avg_abs_error /= amplify_factor_; + avg_abs_error /= count_; + EXPECT_GE(max_avg_error_, avg_abs_error) + << "tx_size = " << tx_size_ << ", tx_type = " << tx_type_; + } + + void TearDown() override { + aom_free(input_); + aom_free(output_); + aom_free(ref_input_); + aom_free(ref_output_); + } + + private: + double max_error_; + double max_avg_error_; + int count_; + double amplify_factor_; + TX_TYPE tx_type_; + TX_SIZE tx_size_; + int tx_width_; + int tx_height_; + int txfm2d_size_; + FwdTxfm2dFunc fwd_txfm_; + int16_t *input_; + int32_t *output_; + double *ref_input_; + double *ref_output_; + int ud_flip_; // flip upside down + int lr_flip_; // flip left to right +}; + +static double avg_error_ls[TX_SIZES_ALL] = { + 0.5, // 4x4 transform + 0.5, // 8x8 transform + 1.2, // 16x16 transform + 6.1, // 32x32 transform + 3.4, // 64x64 transform + 0.57, // 4x8 transform + 0.68, // 8x4 transform + 0.92, // 8x16 transform + 1.1, // 16x8 transform + 4.1, // 16x32 transform + 6, // 32x16 transform + 3.5, // 32x64 transform + 5.7, // 64x32 transform + 0.6, // 4x16 transform + 0.9, // 16x4 transform + 1.2, // 8x32 transform + 1.7, // 32x8 transform + 2.0, // 16x64 transform + 4.7, // 64x16 transform +}; + +static double max_error_ls[TX_SIZES_ALL] = { + 3, // 4x4 transform + 5, // 8x8 transform + 11, // 16x16 transform + 70, // 32x32 transform + 64, // 64x64 transform + 3.9, // 4x8 transform + 4.3, // 8x4 transform + 12, // 8x16 transform + 12, // 16x8 transform + 32, // 16x32 transform + 46, // 32x16 transform + 136, // 32x64 transform + 136, // 64x32 transform + 5, // 4x16 transform + 6, // 16x4 transform + 21, // 8x32 transform + 13, // 32x8 transform + 30, // 16x64 transform + 36, // 64x16 transform +}; + +vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() { + vector<AV1FwdTxfm2dParam> param_list; + for (int s = 0; s < TX_SIZES; ++s) { + const double max_error = max_error_ls[s]; + const double avg_error = avg_error_ls[s]; + for (int t = 0; t < TX_TYPES; ++t) { + const TX_TYPE tx_type = static_cast<TX_TYPE>(t); + const TX_SIZE tx_size = static_cast<TX_SIZE>(s); + if (libaom_test::IsTxSizeTypeValid(tx_size, tx_type)) { + param_list.push_back( + AV1FwdTxfm2dParam(tx_type, tx_size, max_error, avg_error)); + } + } + } + return param_list; +} + +INSTANTIATE_TEST_SUITE_P(C, AV1FwdTxfm2d, + ::testing::ValuesIn(GetTxfm2dParamList())); + +TEST_P(AV1FwdTxfm2d, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); } + +TEST(AV1FwdTxfm2d, CfgTest) { + for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) { + int bd = libaom_test::bd_arr[bd_idx]; + int8_t low_range = libaom_test::low_range_arr[bd_idx]; + int8_t high_range = libaom_test::high_range_arr[bd_idx]; + for (int tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) { + for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size), + static_cast<TX_TYPE>(tx_type)) == + false) { + continue; + } + TXFM_2D_FLIP_CFG cfg; + av1_get_fwd_txfm_cfg(static_cast<TX_TYPE>(tx_type), + static_cast<TX_SIZE>(tx_size), &cfg); + int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; + int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; + av1_gen_fwd_stage_range(stage_range_col, stage_range_row, &cfg, bd); + libaom_test::txfm_stage_range_check(stage_range_col, cfg.stage_num_col, + cfg.cos_bit_col, low_range, + high_range); + libaom_test::txfm_stage_range_check(stage_range_row, cfg.stage_num_row, + cfg.cos_bit_row, low_range, + high_range); + } + } + } +} + +typedef void (*lowbd_fwd_txfm_func)(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TxfmParam *txfm_param); + +void AV1FwdTxfm2dMatchTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) { + const int bd = 8; + TxfmParam param; + memset(¶m, 0, sizeof(param)); + const int rows = tx_size_high[tx_size]; + const int cols = tx_size_wide[tx_size]; + // printf("%d x %d\n", cols, rows); + for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + if (libaom_test::IsTxSizeTypeValid( + tx_size, static_cast<TX_TYPE>(tx_type)) == false) { + continue; + } + + FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size]; + if (ref_func != nullptr) { + DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 }; + DECLARE_ALIGNED(32, int32_t, output[64 * 64]); + DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]); + int input_stride = 64; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int cnt = 0; cnt < 500; ++cnt) { + if (cnt == 0) { + for (int c = 0; c < cols; ++c) { + for (int r = 0; r < rows; ++r) { + input[r * input_stride + c] = (1 << bd) - 1; + } + } + } else { + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + input[r * input_stride + c] = rnd.Rand16() % (1 << bd); + } + } + } + param.tx_type = (TX_TYPE)tx_type; + param.tx_size = (TX_SIZE)tx_size; + param.tx_set_type = EXT_TX_SET_ALL16; + param.bd = bd; + ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd); + target_func(input, output, input_stride, ¶m); + const int check_cols = AOMMIN(32, cols); + const int check_rows = AOMMIN(32, rows * cols / check_cols); + for (int r = 0; r < check_rows; ++r) { + for (int c = 0; c < check_cols; ++c) { + ASSERT_EQ(ref_output[r * check_cols + c], + output[r * check_cols + c]) + << "[" << r << "," << c << "] cnt:" << cnt + << " tx_size: " << cols << "x" << rows + << " tx_type: " << tx_type_name[tx_type]; + } + } + } + } + } +} + +void AV1FwdTxfm2dSpeedTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) { + TxfmParam param; + memset(¶m, 0, sizeof(param)); + const int rows = tx_size_high[tx_size]; + const int cols = tx_size_wide[tx_size]; + const int num_loops = 1000000 / (rows * cols); + + const int bd = 8; + for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + if (libaom_test::IsTxSizeTypeValid( + tx_size, static_cast<TX_TYPE>(tx_type)) == false) { + continue; + } + + FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size]; + if (ref_func != nullptr) { + DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 }; + DECLARE_ALIGNED(32, int32_t, output[64 * 64]); + DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]); + int input_stride = 64; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + input[r * input_stride + c] = rnd.Rand16() % (1 << bd); + } + } + + param.tx_type = (TX_TYPE)tx_type; + param.tx_size = (TX_SIZE)tx_size; + param.tx_set_type = EXT_TX_SET_ALL16; + param.bd = bd; + + aom_usec_timer ref_timer, test_timer; + + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < num_loops; ++i) { + ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int i = 0; i < num_loops; ++i) { + target_func(input, output, input_stride, ¶m); + } + aom_usec_timer_mark(&test_timer); + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + + printf( + "txfm_size[%2dx%-2d] \t txfm_type[%d] \t c_time=%d \t" + "simd_time=%d \t gain=%d \n", + rows, cols, tx_type, elapsed_time_c, elapsed_time_simd, + (elapsed_time_c / elapsed_time_simd)); + } + } +} + +typedef std::tuple<TX_SIZE, lowbd_fwd_txfm_func> LbdFwdTxfm2dParam; + +class AV1FwdTxfm2dTest : public ::testing::TestWithParam<LbdFwdTxfm2dParam> {}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1FwdTxfm2dTest); + +TEST_P(AV1FwdTxfm2dTest, match) { + AV1FwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1)); +} +TEST_P(AV1FwdTxfm2dTest, DISABLED_Speed) { + AV1FwdTxfm2dSpeedTest(GET_PARAM(0), GET_PARAM(1)); +} +TEST(AV1FwdTxfm2dTest, DCTScaleTest) { + BitDepthInfo bd_info; + bd_info.bit_depth = 8; + bd_info.use_highbitdepth_buf = 0; + DECLARE_ALIGNED(32, int16_t, src_diff[1024]); + DECLARE_ALIGNED(32, tran_low_t, coeff[1024]); + + const TX_SIZE tx_size_list[4] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32 }; + const int stride_list[4] = { 4, 8, 16, 32 }; + const int ref_scale_list[4] = { 64, 64, 64, 16 }; + + for (int i = 0; i < 4; i++) { + TX_SIZE tx_size = tx_size_list[i]; + int stride = stride_list[i]; + int array_size = stride * stride; + + for (int j = 0; j < array_size; j++) { + src_diff[j] = 8; + coeff[j] = 0; + } + + av1_quick_txfm(/*use_hadamard=*/0, tx_size, bd_info, src_diff, stride, + coeff); + + double input_sse = 0; + double output_sse = 0; + for (int j = 0; j < array_size; j++) { + input_sse += pow(src_diff[j], 2); + output_sse += pow(coeff[j], 2); + } + + double scale = output_sse / input_sse; + + EXPECT_NEAR(scale, ref_scale_list[i], 5); + } +} +TEST(AV1FwdTxfm2dTest, HadamardScaleTest) { + BitDepthInfo bd_info; + bd_info.bit_depth = 8; + bd_info.use_highbitdepth_buf = 0; + DECLARE_ALIGNED(32, int16_t, src_diff[1024]); + DECLARE_ALIGNED(32, tran_low_t, coeff[1024]); + + const TX_SIZE tx_size_list[4] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32 }; + const int stride_list[4] = { 4, 8, 16, 32 }; + const int ref_scale_list[4] = { 1, 64, 64, 16 }; + + for (int i = 0; i < 4; i++) { + TX_SIZE tx_size = tx_size_list[i]; + int stride = stride_list[i]; + int array_size = stride * stride; + + for (int j = 0; j < array_size; j++) { + src_diff[j] = 8; + coeff[j] = 0; + } + + av1_quick_txfm(/*use_hadamard=*/1, tx_size, bd_info, src_diff, stride, + coeff); + + double input_sse = 0; + double output_sse = 0; + for (int j = 0; j < array_size; j++) { + input_sse += pow(src_diff[j], 2); + output_sse += pow(coeff[j], 2); + } + + double scale = output_sse / input_sse; + + EXPECT_NEAR(scale, ref_scale_list[i], 5); + } +} +using ::testing::Combine; +using ::testing::Values; +using ::testing::ValuesIn; + +#if HAVE_SSE2 +static TX_SIZE fwd_txfm_for_sse2[] = { + TX_4X4, + TX_8X8, + TX_16X16, + TX_32X32, + // TX_64X64, + TX_4X8, + TX_8X4, + TX_8X16, + TX_16X8, + TX_16X32, + TX_32X16, + // TX_32X64, + // TX_64X32, + TX_4X16, + TX_16X4, + TX_8X32, + TX_32X8, + TX_16X64, + TX_64X16, +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, AV1FwdTxfm2dTest, + Combine(ValuesIn(fwd_txfm_for_sse2), + Values(av1_lowbd_fwd_txfm_sse2))); +#endif // HAVE_SSE2 + +#if HAVE_SSE4_1 +static TX_SIZE fwd_txfm_for_sse41[] = { + TX_4X4, + TX_64X64, + TX_32X64, + TX_64X32, +}; + +INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1FwdTxfm2dTest, + Combine(ValuesIn(fwd_txfm_for_sse41), + Values(av1_lowbd_fwd_txfm_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +static TX_SIZE fwd_txfm_for_avx2[] = { + TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_64X64, TX_4X8, TX_8X4, + TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16, + TX_16X4, TX_8X32, TX_32X8, TX_16X64, TX_64X16, +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, AV1FwdTxfm2dTest, + Combine(ValuesIn(fwd_txfm_for_avx2), + Values(av1_lowbd_fwd_txfm_avx2))); +#endif // HAVE_AVX2 + +#if HAVE_NEON + +static TX_SIZE fwd_txfm_for_neon[] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32, + TX_64X64, TX_4X8, TX_8X4, TX_8X16, + TX_16X8, TX_16X32, TX_32X16, TX_32X64, + TX_64X32, TX_4X16, TX_16X4, TX_8X32, + TX_32X8, TX_16X64, TX_64X16 }; + +INSTANTIATE_TEST_SUITE_P(NEON, AV1FwdTxfm2dTest, + Combine(ValuesIn(fwd_txfm_for_neon), + Values(av1_lowbd_fwd_txfm_neon))); + +#endif // HAVE_NEON + +typedef void (*Highbd_fwd_txfm_func)(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TxfmParam *txfm_param); + +void AV1HighbdFwdTxfm2dMatchTest(TX_SIZE tx_size, + Highbd_fwd_txfm_func target_func) { + const int bd_ar[2] = { 10, 12 }; + TxfmParam param; + memset(¶m, 0, sizeof(param)); + const int rows = tx_size_high[tx_size]; + const int cols = tx_size_wide[tx_size]; + for (int i = 0; i < 2; ++i) { + const int bd = bd_ar[i]; + for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + if (libaom_test::IsTxSizeTypeValid( + tx_size, static_cast<TX_TYPE>(tx_type)) == false) { + continue; + } + + FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size]; + if (ref_func != nullptr) { + DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 }; + DECLARE_ALIGNED(32, int32_t, output[64 * 64]); + DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]); + int input_stride = 64; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int cnt = 0; cnt < 500; ++cnt) { + if (cnt == 0) { + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + input[r * input_stride + c] = (1 << bd) - 1; + } + } + } else { + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + input[r * input_stride + c] = rnd.Rand16() % (1 << bd); + } + } + } + param.tx_type = (TX_TYPE)tx_type; + param.tx_size = (TX_SIZE)tx_size; + param.tx_set_type = EXT_TX_SET_ALL16; + param.bd = bd; + + ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd); + target_func(input, output, input_stride, ¶m); + const int check_cols = AOMMIN(32, cols); + const int check_rows = AOMMIN(32, rows * cols / check_cols); + for (int r = 0; r < check_rows; ++r) { + for (int c = 0; c < check_cols; ++c) { + ASSERT_EQ(ref_output[c * check_rows + r], + output[c * check_rows + r]) + << "[" << r << "," << c << "] cnt:" << cnt + << " tx_size: " << cols << "x" << rows + << " tx_type: " << tx_type; + } + } + } + } + } + } +} + +void AV1HighbdFwdTxfm2dSpeedTest(TX_SIZE tx_size, + Highbd_fwd_txfm_func target_func) { + const int bd_ar[2] = { 10, 12 }; + TxfmParam param; + memset(¶m, 0, sizeof(param)); + const int rows = tx_size_high[tx_size]; + const int cols = tx_size_wide[tx_size]; + const int num_loops = 1000000 / (rows * cols); + + for (int i = 0; i < 2; ++i) { + const int bd = bd_ar[i]; + for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + if (libaom_test::IsTxSizeTypeValid( + tx_size, static_cast<TX_TYPE>(tx_type)) == false) { + continue; + } + + FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size]; + if (ref_func != nullptr) { + DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 }; + DECLARE_ALIGNED(32, int32_t, output[64 * 64]); + DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]); + int input_stride = 64; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + input[r * input_stride + c] = rnd.Rand16() % (1 << bd); + } + } + + param.tx_type = (TX_TYPE)tx_type; + param.tx_size = (TX_SIZE)tx_size; + param.tx_set_type = EXT_TX_SET_ALL16; + param.bd = bd; + + aom_usec_timer ref_timer, test_timer; + + aom_usec_timer_start(&ref_timer); + for (int j = 0; j < num_loops; ++j) { + ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int j = 0; j < num_loops; ++j) { + target_func(input, output, input_stride, ¶m); + } + aom_usec_timer_mark(&test_timer); + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + + printf( + "txfm_size[%2dx%-2d] \t txfm_type[%d] \t c_time=%d \t" + "simd_time=%d \t gain=%d \n", + cols, rows, tx_type, elapsed_time_c, elapsed_time_simd, + (elapsed_time_c / elapsed_time_simd)); + } + } + } +} + +typedef std::tuple<TX_SIZE, Highbd_fwd_txfm_func> HighbdFwdTxfm2dParam; + +class AV1HighbdFwdTxfm2dTest + : public ::testing::TestWithParam<HighbdFwdTxfm2dParam> {}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdFwdTxfm2dTest); + +TEST_P(AV1HighbdFwdTxfm2dTest, match) { + AV1HighbdFwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1)); +} + +TEST_P(AV1HighbdFwdTxfm2dTest, DISABLED_Speed) { + AV1HighbdFwdTxfm2dSpeedTest(GET_PARAM(0), GET_PARAM(1)); +} + +using ::testing::Combine; +using ::testing::Values; +using ::testing::ValuesIn; + +#if HAVE_SSE4_1 +static TX_SIZE Highbd_fwd_txfm_for_sse4_1[] = { + TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_64X64, TX_4X8, TX_8X4, + TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, +#if !CONFIG_REALTIME_ONLY + TX_4X16, TX_16X4, TX_8X32, TX_32X8, TX_16X64, TX_64X16, +#endif +}; + +INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdFwdTxfm2dTest, + Combine(ValuesIn(Highbd_fwd_txfm_for_sse4_1), + Values(av1_highbd_fwd_txfm))); +#endif // HAVE_SSE4_1 +#if HAVE_AVX2 +static TX_SIZE Highbd_fwd_txfm_for_avx2[] = { TX_8X8, TX_16X16, TX_32X32, + TX_64X64, TX_8X16, TX_16X8 }; + +INSTANTIATE_TEST_SUITE_P(AVX2, AV1HighbdFwdTxfm2dTest, + Combine(ValuesIn(Highbd_fwd_txfm_for_avx2), + Values(av1_highbd_fwd_txfm))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +static TX_SIZE Highbd_fwd_txfm_for_neon[] = { + TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_64X64, TX_4X8, TX_8X4, + TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16, + TX_16X4, TX_8X32, TX_32X8, TX_16X64, TX_64X16 +}; + +INSTANTIATE_TEST_SUITE_P(NEON, AV1HighbdFwdTxfm2dTest, + Combine(ValuesIn(Highbd_fwd_txfm_for_neon), + Values(av1_highbd_fwd_txfm))); +#endif // HAVE_NEON + +} // namespace diff --git a/third_party/aom/test/av1_highbd_iht_test.cc b/third_party/aom/test/av1_highbd_iht_test.cc new file mode 100644 index 0000000000..2c57362a82 --- /dev/null +++ b/third_party/aom/test/av1_highbd_iht_test.cc @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" + +#include "test/acm_random.h" +#include "test/av1_txfm_test.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/enums.h" +#include "av1/common/scan.h" +#include "aom_dsp/aom_dsp_common.h" +#include "aom_ports/mem.h" + +namespace { + +using libaom_test::ACMRandom; +using std::tuple; + +typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride, + TX_TYPE tx_type, int bd); + +typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride, + TX_TYPE tx_type, int bd); +static const char *tx_type_name[] = { + "DCT_DCT", + "ADST_DCT", + "DCT_ADST", + "ADST_ADST", + "FLIPADST_DCT", + "DCT_FLIPADST", + "FLIPADST_FLIPADST", + "ADST_FLIPADST", + "FLIPADST_ADST", + "IDTX", + "V_DCT", + "H_DCT", + "V_ADST", + "H_ADST", + "V_FLIPADST", + "H_FLIPADST", +}; +// Test parameter argument list: +// <transform reference function, +// optimized inverse transform function, +// inverse transform reference function, +// num_coeffs, +// tx_type, +// bit_depth> +typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, TX_TYPE, int> IHbdHtParam; + +class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> { + public: + ~AV1HighbdInvHTNxN() override = default; + + void SetUp() override { + txfm_ref_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + inv_txfm_ref_ = GET_PARAM(2); + num_coeffs_ = GET_PARAM(3); + tx_type_ = GET_PARAM(4); + bit_depth_ = GET_PARAM(5); + + input_ = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(input_[0]) * num_coeffs_)); + ASSERT_NE(input_, nullptr); + + // Note: + // Inverse transform input buffer is 32-byte aligned + // Refer to <root>/av1/encoder/context_tree.c, function, + // void alloc_mode_context(). + coeffs_ = reinterpret_cast<int32_t *>( + aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_)); + ASSERT_NE(coeffs_, nullptr); + output_ = reinterpret_cast<uint16_t *>( + aom_memalign(32, sizeof(output_[0]) * num_coeffs_)); + ASSERT_NE(output_, nullptr); + output_ref_ = reinterpret_cast<uint16_t *>( + aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_)); + ASSERT_NE(output_ref_, nullptr); + } + + void TearDown() override { + aom_free(input_); + aom_free(coeffs_); + aom_free(output_); + aom_free(output_ref_); + } + + protected: + void RunBitexactCheck(); + + private: + int GetStride() const { + if (16 == num_coeffs_) { + return 4; + } else if (64 == num_coeffs_) { + return 8; + } else if (256 == num_coeffs_) { + return 16; + } else if (1024 == num_coeffs_) { + return 32; + } else if (4096 == num_coeffs_) { + return 64; + } else { + return 0; + } + } + + HbdHtFunc txfm_ref_; + IHbdHtFunc inv_txfm_; + IHbdHtFunc inv_txfm_ref_; + int num_coeffs_; + TX_TYPE tx_type_; + int bit_depth_; + + int16_t *input_; + int32_t *coeffs_; + uint16_t *output_; + uint16_t *output_ref_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvHTNxN); + +void AV1HighbdInvHTNxN::RunBitexactCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int stride = GetStride(); + const int num_tests = 20000; + const uint16_t mask = (1 << bit_depth_) - 1; + + for (int i = 0; i < num_tests; ++i) { + for (int j = 0; j < num_coeffs_; ++j) { + input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask); + output_ref_[j] = rnd.Rand16() & mask; + output_[j] = output_ref_[j]; + } + + txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_); + inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_); + API_REGISTER_STATE_CHECK( + inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_)); + + for (int j = 0; j < num_coeffs_; ++j) { + EXPECT_EQ(output_ref_[j], output_[j]) + << "Not bit-exact result at index: " << j << " At test block: " << i; + } + } +} + +TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); } + +using std::make_tuple; + +#if HAVE_SSE4_1 +#define PARAM_LIST_4X4 \ + &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \ + &av1_inv_txfm2d_add_4x4_c, 16 + +const IHbdHtParam kArrayIhtParam[] = { + // 4x4 + make_tuple(PARAM_LIST_4X4, DCT_DCT, 10), + make_tuple(PARAM_LIST_4X4, DCT_DCT, 12), + make_tuple(PARAM_LIST_4X4, ADST_DCT, 10), + make_tuple(PARAM_LIST_4X4, ADST_DCT, 12), + make_tuple(PARAM_LIST_4X4, DCT_ADST, 10), + make_tuple(PARAM_LIST_4X4, DCT_ADST, 12), + make_tuple(PARAM_LIST_4X4, ADST_ADST, 10), + make_tuple(PARAM_LIST_4X4, ADST_ADST, 12), + make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10), + make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12), + make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10), + make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12), + make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10), + make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12), + make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10), + make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12), + make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10), + make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12), +}; + +INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvHTNxN, + ::testing::ValuesIn(kArrayIhtParam)); +#endif // HAVE_SSE4_1 + +typedef void (*HighbdInvTxfm2dFunc)(const int32_t *input, uint8_t *output, + int stride, const TxfmParam *txfm_param); + +typedef std::tuple<const HighbdInvTxfm2dFunc> AV1HighbdInvTxfm2dParam; +class AV1HighbdInvTxfm2d + : public ::testing::TestWithParam<AV1HighbdInvTxfm2dParam> { + public: + void SetUp() override { target_func_ = GET_PARAM(0); } + void RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size, int run_times, + int bit_depth, int gt_int16 = 0); + + private: + HighbdInvTxfm2dFunc target_func_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvTxfm2d); + +void AV1HighbdInvTxfm2d::RunAV1InvTxfm2dTest(TX_TYPE tx_type_, TX_SIZE tx_size_, + int run_times, int bit_depth_, + int gt_int16) { +#if CONFIG_REALTIME_ONLY + if (tx_size_ >= TX_4X16) { + return; + } +#endif + FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size_]; + TxfmParam txfm_param; + const int BLK_WIDTH = 64; + const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH; + DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 }; + DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 }; + DECLARE_ALIGNED(32, uint16_t, output[BLK_SIZE]) = { 0 }; + DECLARE_ALIGNED(32, uint16_t, ref_output[BLK_SIZE]) = { 0 }; + int stride = BLK_WIDTH; + int rows = tx_size_high[tx_size_]; + int cols = tx_size_wide[tx_size_]; + const int rows_nonezero = AOMMIN(32, rows); + const int cols_nonezero = AOMMIN(32, cols); + const uint16_t mask = (1 << bit_depth_) - 1; + run_times /= (rows * cols); + run_times = AOMMAX(1, run_times); + const SCAN_ORDER *scan_order = get_default_scan(tx_size_, tx_type_); + const int16_t *scan = scan_order->scan; + const int16_t eobmax = rows_nonezero * cols_nonezero; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int randTimes = run_times == 1 ? (eobmax) : 1; + + txfm_param.tx_type = tx_type_; + txfm_param.tx_size = tx_size_; + txfm_param.lossless = 0; + txfm_param.bd = bit_depth_; + txfm_param.is_hbd = 1; + txfm_param.tx_set_type = EXT_TX_SET_ALL16; + + for (int cnt = 0; cnt < randTimes; ++cnt) { + for (int r = 0; r < BLK_WIDTH; ++r) { + for (int c = 0; c < BLK_WIDTH; ++c) { + input[r * cols + c] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask); + output[r * stride + c] = rnd.Rand16() & mask; + + ref_output[r * stride + c] = output[r * stride + c]; + } + } + fwd_func_(input, inv_input, stride, tx_type_, bit_depth_); + + // produce eob input by setting high freq coeffs to zero + const int eob = AOMMIN(cnt + 1, eobmax); + for (int i = eob; i < eobmax; i++) { + inv_input[scan[i]] = 0; + } + txfm_param.eob = eob; + if (gt_int16) { + const uint16_t inv_input_mask = + static_cast<uint16_t>((1 << (bit_depth_ + 7)) - 1); + for (int i = 0; i < eob; i++) { + inv_input[scan[i]] = (rnd.Rand31() & inv_input_mask); + } + } + + aom_usec_timer ref_timer, test_timer; + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < run_times; ++i) { + av1_highbd_inv_txfm_add_c(inv_input, CONVERT_TO_BYTEPTR(ref_output), + stride, &txfm_param); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int i = 0; i < run_times; ++i) { + target_func_(inv_input, CONVERT_TO_BYTEPTR(output), stride, &txfm_param); + } + aom_usec_timer_mark(&test_timer); + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + if (run_times > 10) { + printf( + "txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t " + "gain=%d \n", + tx_size_, tx_type_, elapsed_time_c, elapsed_time_simd, + (elapsed_time_c / elapsed_time_simd)); + } else { + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + ASSERT_EQ(ref_output[r * stride + c], output[r * stride + c]) + << "[" << r << "," << c << "] " << cnt << " tx_size: " << cols + << "x" << rows << " bit_depth_: " << bit_depth_ + << " tx_type: " << tx_type_name[tx_type_] << " eob " << eob; + } + } + } + } +} + +TEST_P(AV1HighbdInvTxfm2d, match) { + int bitdepth_ar[3] = { 8, 10, 12 }; + for (int k = 0; k < 3; ++k) { + int bd = bitdepth_ar[k]; + for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) { + for (int i = 0; i < (int)TX_TYPES; ++i) { + if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j), + static_cast<TX_TYPE>(i))) { + RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j), + 1, bd); + } + } + } + } +} + +TEST_P(AV1HighbdInvTxfm2d, gt_int16) { + int bitdepth_ar[3] = { 8, 10, 12 }; + static const TX_TYPE types[] = { + DCT_DCT, ADST_DCT, FLIPADST_DCT, IDTX, V_DCT, H_DCT, H_ADST, H_FLIPADST + }; + for (int k = 0; k < 3; ++k) { + int bd = bitdepth_ar[k]; + for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) { + const TX_SIZE sz = static_cast<TX_SIZE>(j); + for (uint8_t i = 0; i < sizeof(types) / sizeof(TX_TYPE); ++i) { + const TX_TYPE tp = types[i]; + if (libaom_test::IsTxSizeTypeValid(sz, tp)) { + RunAV1InvTxfm2dTest(tp, sz, 1, bd, 1); + } + } + } + } +} + +TEST_P(AV1HighbdInvTxfm2d, DISABLED_Speed) { + int bitdepth_ar[2] = { 10, 12 }; + for (int k = 0; k < 2; ++k) { + int bd = bitdepth_ar[k]; + for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) { + for (int i = 0; i < (int)TX_TYPES; ++i) { + if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j), + static_cast<TX_TYPE>(i))) { + RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j), + 1000000, bd); + } + } + } + } +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvTxfm2d, + ::testing::Values(av1_highbd_inv_txfm_add_sse4_1)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1HighbdInvTxfm2d, + ::testing::Values(av1_highbd_inv_txfm_add_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1HighbdInvTxfm2d, + ::testing::Values(av1_highbd_inv_txfm_add_neon)); +#endif + +} // namespace diff --git a/third_party/aom/test/av1_horz_only_frame_superres_test.cc b/third_party/aom/test/av1_horz_only_frame_superres_test.cc new file mode 100644 index 0000000000..e9cf02e202 --- /dev/null +++ b/third_party/aom/test/av1_horz_only_frame_superres_test.cc @@ -0,0 +1,385 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> +#include <vector> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "av1/common/convolve.h" +#include "av1/common/resize.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { +const int kTestIters = 10; +const int kPerfIters = 1000; + +const int kVPad = 32; +const int kHPad = 32; + +using libaom_test::ACMRandom; +using std::make_tuple; +using std::tuple; + +template <typename Pixel> +class TestImage { + public: + TestImage(int w_src, int h, int superres_denom, int x0, int bd) + : w_src_(w_src), h_(h), superres_denom_(superres_denom), x0_(x0), + bd_(bd) { + assert(bd < 16); + assert(bd <= 8 * static_cast<int>(sizeof(Pixel))); + assert(9 <= superres_denom && superres_denom <= 16); + assert(SCALE_NUMERATOR == 8); + assert(0 <= x0_ && x0_ <= RS_SCALE_SUBPEL_MASK); + + w_dst_ = w_src_; + av1_calculate_unscaled_superres_size(&w_dst_, nullptr, superres_denom); + + src_stride_ = ALIGN_POWER_OF_TWO(w_src_ + 2 * kHPad, 4); + dst_stride_ = ALIGN_POWER_OF_TWO(w_dst_ + 2 * kHPad, 4); + + // Allocate image data + src_data_.resize(2 * src_block_size()); + dst_data_.resize(2 * dst_block_size()); + } + + void Initialize(ACMRandom *rnd); + void Check() const; + + int src_stride() const { return src_stride_; } + int dst_stride() const { return dst_stride_; } + + int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); } + int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); } + + int src_width() const { return w_src_; } + int dst_width() const { return w_dst_; } + int height() const { return h_; } + int x0() const { return x0_; } + + const Pixel *GetSrcData(bool ref, bool borders) const { + const Pixel *block = &src_data_[ref ? 0 : src_block_size()]; + return borders ? block : block + kHPad + src_stride_ * kVPad; + } + + Pixel *GetDstData(bool ref, bool borders) { + Pixel *block = &dst_data_[ref ? 0 : dst_block_size()]; + return borders ? block : block + kHPad + dst_stride_ * kVPad; + } + + private: + int w_src_, w_dst_, h_, superres_denom_, x0_, bd_; + int src_stride_, dst_stride_; + + std::vector<Pixel> src_data_; + std::vector<Pixel> dst_data_; +}; + +template <typename Pixel> +void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) { + if (!trash) { + memset(data, 0, sizeof(*data) * num_pixels); + return; + } + const Pixel mask = (1 << bd) - 1; + for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask; +} + +template <typename Pixel> +void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd, + bool trash_edges, Pixel *data) { + assert(rnd); + const Pixel mask = (1 << bd) - 1; + + // Fill in the first buffer with random data + // Top border + FillEdge(rnd, stride * kVPad, bd, trash_edges, data); + for (int r = 0; r < h; ++r) { + Pixel *row_data = data + (kVPad + r) * stride; + // Left border, contents, right border + FillEdge(rnd, kHPad, bd, trash_edges, row_data); + for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask; + FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w); + } + // Bottom border + FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h)); + + const int bpp = sizeof(*data); + const int block_elts = stride * (h + 2 * kVPad); + const int block_size = bpp * block_elts; + + // Now copy that to the second buffer + memcpy(data + block_elts, data, block_size); +} + +template <typename Pixel> +void TestImage<Pixel>::Initialize(ACMRandom *rnd) { + PrepBuffers(rnd, w_src_, h_, src_stride_, bd_, false, &src_data_[0]); + PrepBuffers(rnd, w_dst_, h_, dst_stride_, bd_, true, &dst_data_[0]); +} + +template <typename Pixel> +void TestImage<Pixel>::Check() const { + const int num_pixels = dst_block_size(); + const Pixel *ref_dst = &dst_data_[0]; + const Pixel *tst_dst = &dst_data_[num_pixels]; + + // If memcmp returns 0, there's nothing to do. + if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return; + + // Otherwise, iterate through the buffer looking for differences, *ignoring + // the edges* + const int stride = dst_stride_; + for (int r = kVPad; r < h_ + kVPad; ++r) { + for (int c = kVPad; c < w_dst_ + kHPad; ++c) { + const int32_t ref_value = ref_dst[r * stride + c]; + const int32_t tst_value = tst_dst[r * stride + c]; + + EXPECT_EQ(tst_value, ref_value) + << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad) + << ", superres_denom: " << superres_denom_ << ", height: " << h_ + << ", src_width: " << w_src_ << ", dst_width: " << w_dst_ + << ", x0: " << x0_; + } + } +} + +template <typename Pixel> +class ConvolveHorizRSTestBase : public ::testing::Test { + public: + ConvolveHorizRSTestBase() : image_(nullptr) {} + ~ConvolveHorizRSTestBase() override = default; + + // Implemented by subclasses (SetUp depends on the parameters passed + // in and RunOne depends on the function to be tested. These can't + // be templated for low/high bit depths because they have different + // numbers of parameters) + void SetUp() override = 0; + virtual void RunOne(bool ref) = 0; + + protected: + void SetBitDepth(int bd) { bd_ = bd; } + + void CorrectnessTest() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int i = 0; i < kTestIters; ++i) { + for (int superres_denom = 9; superres_denom <= 16; superres_denom++) { + // Get a random height between 512 and 767 + int height = rnd.Rand8() + 512; + + // Get a random src width between 128 and 383 + int width_src = rnd.Rand8() + 128; + + // x0 is normally calculated by get_upscale_convolve_x0 in + // av1/common/resize.c. However, this test should work for + // any value of x0 between 0 and RS_SCALE_SUBPEL_MASK + // (inclusive), so we choose one at random. + int x0 = rnd.Rand16() % (RS_SCALE_SUBPEL_MASK + 1); + + image_ = + new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_); + ASSERT_NE(image_, nullptr); + + Prep(&rnd); + RunOne(true); + RunOne(false); + image_->Check(); + + delete image_; + } + } + } + + void SpeedTest() { + // Pick some specific parameters to test + int height = 767; + int width_src = 129; + int superres_denom = 13; + int x0 = RS_SCALE_SUBPEL_MASK >> 1; + + image_ = new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_); + ASSERT_NE(image_, nullptr); + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + Prep(&rnd); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < kPerfIters; ++i) RunOne(true); + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (int i = 0; i < kPerfIters; ++i) RunOne(false); + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: ConvolveHorizRSTest (Speed Test), SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + } + + void Prep(ACMRandom *rnd) { + assert(rnd); + image_->Initialize(rnd); + } + + int bd_; + TestImage<Pixel> *image_; +}; + +typedef void (*LowBDConvolveHorizRsFunc)(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int w, + int h, const int16_t *x_filters, + const int x0_qn, const int x_step_qn); + +// Test parameter list: +// <tst_fun_> +typedef tuple<LowBDConvolveHorizRsFunc> LowBDParams; + +class LowBDConvolveHorizRSTest + : public ConvolveHorizRSTestBase<uint8_t>, + public ::testing::WithParamInterface<LowBDParams> { + public: + ~LowBDConvolveHorizRSTest() override = default; + + void SetUp() override { + tst_fun_ = GET_PARAM(0); + const int bd = 8; + SetBitDepth(bd); + } + + void RunOne(bool ref) override { + const uint8_t *src = image_->GetSrcData(ref, false); + uint8_t *dst = image_->GetDstData(ref, false); + const int src_stride = image_->src_stride(); + const int dst_stride = image_->dst_stride(); + const int width_src = image_->src_width(); + const int width_dst = image_->dst_width(); + const int height = image_->height(); + const int x0_qn = image_->x0(); + + const int32_t x_step_qn = + av1_get_upscale_convolve_step(width_src, width_dst); + + if (ref) { + av1_convolve_horiz_rs_c(src, src_stride, dst, dst_stride, width_dst, + height, &av1_resize_filter_normative[0][0], x0_qn, + x_step_qn); + } else { + tst_fun_(src, src_stride, dst, dst_stride, width_dst, height, + &av1_resize_filter_normative[0][0], x0_qn, x_step_qn); + } + } + + private: + LowBDConvolveHorizRsFunc tst_fun_; +}; + +TEST_P(LowBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); } +TEST_P(LowBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P(C, LowBDConvolveHorizRSTest, + ::testing::Values(av1_convolve_horiz_rs_c)); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, LowBDConvolveHorizRSTest, + ::testing::Values(av1_convolve_horiz_rs_sse4_1)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +typedef void (*HighBDConvolveHorizRsFunc)(const uint16_t *src, int src_stride, + uint16_t *dst, int dst_stride, int w, + int h, const int16_t *x_filters, + const int x0_qn, const int x_step_qn, + int bd); + +// Test parameter list: +// <tst_fun_, bd_> +typedef tuple<HighBDConvolveHorizRsFunc, int> HighBDParams; + +class HighBDConvolveHorizRSTest + : public ConvolveHorizRSTestBase<uint16_t>, + public ::testing::WithParamInterface<HighBDParams> { + public: + ~HighBDConvolveHorizRSTest() override = default; + + void SetUp() override { + tst_fun_ = GET_PARAM(0); + const int bd = GET_PARAM(1); + SetBitDepth(bd); + } + + void RunOne(bool ref) override { + const uint16_t *src = image_->GetSrcData(ref, false); + uint16_t *dst = image_->GetDstData(ref, false); + const int src_stride = image_->src_stride(); + const int dst_stride = image_->dst_stride(); + const int width_src = image_->src_width(); + const int width_dst = image_->dst_width(); + const int height = image_->height(); + const int x0_qn = image_->x0(); + + const int32_t x_step_qn = + av1_get_upscale_convolve_step(width_src, width_dst); + + if (ref) { + av1_highbd_convolve_horiz_rs_c( + src, src_stride, dst, dst_stride, width_dst, height, + &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_); + } else { + tst_fun_(src, src_stride, dst, dst_stride, width_dst, height, + &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_); + } + } + + private: + HighBDConvolveHorizRsFunc tst_fun_; +}; + +const int kBDs[] = { 8, 10, 12 }; + +TEST_P(HighBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); } +TEST_P(HighBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); } + +INSTANTIATE_TEST_SUITE_P( + C, HighBDConvolveHorizRSTest, + ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_c), + ::testing::ValuesIn(kBDs))); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, HighBDConvolveHorizRSTest, + ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_sse4_1), + ::testing::ValuesIn(kBDs))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, HighBDConvolveHorizRSTest, + ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_neon), + ::testing::ValuesIn(kBDs))); +#endif // HAVE_NEON + +#endif // CONFIG_AV1_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/av1_inv_txfm1d_test.cc b/third_party/aom/test/av1_inv_txfm1d_test.cc new file mode 100644 index 0000000000..e70b22a35a --- /dev/null +++ b/third_party/aom/test/av1_inv_txfm1d_test.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> + +#include "test/av1_txfm_test.h" +#include "test/util.h" +#include "av1/common/av1_inv_txfm1d.h" +#include "av1/encoder/av1_fwd_txfm1d.h" + +typedef TX_SIZE TxSize; + +using libaom_test::ACMRandom; +using libaom_test::input_base; + +namespace { +const int txfm_type_num = 2; +const int txfm_size_ls[] = { 4, 8, 16, 32, 64 }; + +const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = { + { av1_fdct4, av1_fadst4 }, { av1_fdct8, av1_fadst8 }, + { av1_fdct16, av1_fadst16 }, { av1_fdct32, nullptr }, + { av1_fdct64, nullptr }, +}; + +const TxfmFunc inv_txfm_func_ls[][txfm_type_num] = { + { av1_idct4, av1_iadst4 }, { av1_idct8, av1_iadst8 }, + { av1_idct16, av1_iadst16 }, { av1_idct32, nullptr }, + { av1_idct64, nullptr }, +}; + +// the maximum stage number of fwd/inv 1d dct/adst txfm is 12 +const int8_t cos_bit = 13; +const int8_t range_bit[12] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 }; + +void reference_idct_1d_int(const int32_t *in, int32_t *out, int size) { + double input[64]; + for (int i = 0; i < size; ++i) input[i] = in[i]; + + double output[64]; + libaom_test::reference_idct_1d(input, output, size); + + for (int i = 0; i < size; ++i) { + ASSERT_GE(output[i], INT32_MIN); + ASSERT_LE(output[i], INT32_MAX); + out[i] = static_cast<int32_t>(round(output[i])); + } +} + +void random_matrix(int32_t *dst, int len, ACMRandom *rnd) { + const int bits = 16; + const int maxVal = (1 << (bits - 1)) - 1; + const int minVal = -(1 << (bits - 1)); + for (int i = 0; i < len; ++i) { + if (rnd->Rand8() % 10) + dst[i] = minVal + rnd->Rand16() % (1 << bits); + else + dst[i] = rnd->Rand8() % 2 ? minVal : maxVal; + } +} + +TEST(av1_inv_txfm1d, InvAccuracyCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 20000; + const int max_error[] = { 6, 10, 19, 31, 40 }; + ASSERT_EQ(NELEMENTS(max_error), TX_SIZES); + ASSERT_EQ(NELEMENTS(inv_txfm_func_ls), TX_SIZES); + for (int i = 0; i < count_test_block; ++i) { + // choose a random transform to test + const TxSize tx_size = static_cast<TxSize>(rnd.Rand8() % TX_SIZES); + const int txfm_size = txfm_size_ls[tx_size]; + const TxfmFunc inv_txfm_func = inv_txfm_func_ls[tx_size][0]; + + int32_t input[64]; + random_matrix(input, txfm_size, &rnd); + + // 64x64 transform assumes last 32 values are zero. + memset(input + 32, 0, 32 * sizeof(input[0])); + + int32_t ref_output[64]; + memset(ref_output, 0, sizeof(ref_output)); + reference_idct_1d_int(input, ref_output, txfm_size); + + int32_t output[64]; + memset(output, 0, sizeof(output)); + inv_txfm_func(input, output, cos_bit, range_bit); + + for (int ni = 0; ni < txfm_size; ++ni) { + EXPECT_LE(abs(output[ni] - ref_output[ni]), max_error[tx_size]) + << "tx_size = " << tx_size << ", ni = " << ni + << ", output[ni] = " << output[ni] + << ", ref_output[ni] = " << ref_output[ni]; + } + } +} + +static INLINE int get_max_bit(int x) { + int max_bit = -1; + while (x) { + x = x >> 1; + max_bit++; + } + return max_bit; +} + +TEST(av1_inv_txfm1d, get_max_bit) { + int max_bit = get_max_bit(8); + EXPECT_EQ(max_bit, 3); +} + +TEST(av1_inv_txfm1d, round_trip) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int si = 0; si < NELEMENTS(fwd_txfm_func_ls); ++si) { + int txfm_size = txfm_size_ls[si]; + + for (int ti = 0; ti < txfm_type_num; ++ti) { + TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[si][ti]; + TxfmFunc inv_txfm_func = inv_txfm_func_ls[si][ti]; + int max_error = 2; + + if (!fwd_txfm_func) continue; + + const int count_test_block = 5000; + for (int i = 0; i < count_test_block; ++i) { + int32_t input[64]; + int32_t output[64]; + int32_t round_trip_output[64]; + + ASSERT_LE(txfm_size, NELEMENTS(input)); + + for (int ni = 0; ni < txfm_size; ++ni) { + input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base; + } + + fwd_txfm_func(input, output, cos_bit, range_bit); + inv_txfm_func(output, round_trip_output, cos_bit, range_bit); + + for (int ni = 0; ni < txfm_size; ++ni) { + int node_err = + abs(input[ni] - round_shift(round_trip_output[ni], + get_max_bit(txfm_size) - 1)); + EXPECT_LE(node_err, max_error); + } + } + } + } +} + +} // namespace diff --git a/third_party/aom/test/av1_inv_txfm2d_test.cc b/third_party/aom/test/av1_inv_txfm2d_test.cc new file mode 100644 index 0000000000..35a87a43b8 --- /dev/null +++ b/third_party/aom/test/av1_inv_txfm2d_test.cc @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <tuple> +#include <vector> + +#include "config/av1_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "av1/common/av1_inv_txfm1d_cfg.h" +#include "av1/common/scan.h" +#include "test/acm_random.h" +#include "test/av1_txfm_test.h" +#include "test/util.h" + +using libaom_test::ACMRandom; +using libaom_test::bd; +using libaom_test::compute_avg_abs_error; +using libaom_test::input_base; +using libaom_test::InvTxfm2dFunc; +using libaom_test::LbdInvTxfm2dFunc; +using libaom_test::tx_type_name; + +using ::testing::Combine; +using ::testing::Range; +using ::testing::Values; + +using std::vector; + +typedef TX_TYPE TxType; +typedef TX_SIZE TxSize; + +namespace { + +// AV1InvTxfm2dParam argument list: +// tx_type_, tx_size_, max_error_, max_avg_error_ +typedef std::tuple<TxType, TxSize, int, double> AV1InvTxfm2dParam; + +class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> { + public: + void SetUp() override { + tx_type_ = GET_PARAM(0); + tx_size_ = GET_PARAM(1); + max_error_ = GET_PARAM(2); + max_avg_error_ = GET_PARAM(3); + } + + void RunRoundtripCheck() { + int tx_w = tx_size_wide[tx_size_]; + int tx_h = tx_size_high[tx_size_]; + int txfm2d_size = tx_w * tx_h; + const FwdTxfm2dFunc fwd_txfm_func = libaom_test::fwd_txfm_func_ls[tx_size_]; + const InvTxfm2dFunc inv_txfm_func = libaom_test::inv_txfm_func_ls[tx_size_]; + double avg_abs_error = 0; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + const int count = 500; + + for (int ci = 0; ci < count; ci++) { + DECLARE_ALIGNED(16, int16_t, input[64 * 64]) = { 0 }; + ASSERT_LE(txfm2d_size, NELEMENTS(input)); + + for (int ni = 0; ni < txfm2d_size; ++ni) { + if (ci == 0) { + int extreme_input = input_base - 1; + input[ni] = extreme_input; // extreme case + } else { + input[ni] = rnd.Rand16() % input_base; + } + } + + DECLARE_ALIGNED(16, uint16_t, expected[64 * 64]) = { 0 }; + ASSERT_LE(txfm2d_size, NELEMENTS(expected)); + if (TxfmUsesApproximation()) { + // Compare reference forward HT + inverse HT vs forward HT + inverse HT. + double ref_input[64 * 64]; + ASSERT_LE(txfm2d_size, NELEMENTS(ref_input)); + for (int ni = 0; ni < txfm2d_size; ++ni) { + ref_input[ni] = input[ni]; + } + double ref_coeffs[64 * 64] = { 0 }; + ASSERT_LE(txfm2d_size, NELEMENTS(ref_coeffs)); + ASSERT_EQ(tx_type_, static_cast<TxType>(DCT_DCT)); + libaom_test::reference_hybrid_2d(ref_input, ref_coeffs, tx_type_, + tx_size_); + DECLARE_ALIGNED(16, int32_t, ref_coeffs_int[64 * 64]) = { 0 }; + ASSERT_LE(txfm2d_size, NELEMENTS(ref_coeffs_int)); + for (int ni = 0; ni < txfm2d_size; ++ni) { + ref_coeffs_int[ni] = (int32_t)round(ref_coeffs[ni]); + } + inv_txfm_func(ref_coeffs_int, expected, tx_w, tx_type_, bd); + } else { + // Compare original input vs forward HT + inverse HT. + for (int ni = 0; ni < txfm2d_size; ++ni) { + expected[ni] = input[ni]; + } + } + + DECLARE_ALIGNED(16, int32_t, coeffs[64 * 64]) = { 0 }; + ASSERT_LE(txfm2d_size, NELEMENTS(coeffs)); + fwd_txfm_func(input, coeffs, tx_w, tx_type_, bd); + + DECLARE_ALIGNED(16, uint16_t, actual[64 * 64]) = { 0 }; + ASSERT_LE(txfm2d_size, NELEMENTS(actual)); + inv_txfm_func(coeffs, actual, tx_w, tx_type_, bd); + + double actual_max_error = 0; + for (int ni = 0; ni < txfm2d_size; ++ni) { + const double this_error = abs(expected[ni] - actual[ni]); + actual_max_error = AOMMAX(actual_max_error, this_error); + } + EXPECT_GE(max_error_, actual_max_error) + << " tx_w: " << tx_w << " tx_h " << tx_h + << " tx_type: " << tx_type_name[tx_type_]; + if (actual_max_error > max_error_) { // exit early. + break; + } + avg_abs_error += compute_avg_abs_error<uint16_t, uint16_t>( + expected, actual, txfm2d_size); + } + + avg_abs_error /= count; + EXPECT_GE(max_avg_error_, avg_abs_error) + << " tx_w: " << tx_w << " tx_h " << tx_h + << " tx_type: " << tx_type_name[tx_type_]; + } + + private: + bool TxfmUsesApproximation() { + if (tx_size_wide[tx_size_] == 64 || tx_size_high[tx_size_] == 64) { + return true; + } + return false; + } + + int max_error_; + double max_avg_error_; + TxType tx_type_; + TxSize tx_size_; +}; + +static int max_error_ls[TX_SIZES_ALL] = { + 2, // 4x4 transform + 2, // 8x8 transform + 2, // 16x16 transform + 4, // 32x32 transform + 3, // 64x64 transform + 2, // 4x8 transform + 2, // 8x4 transform + 2, // 8x16 transform + 2, // 16x8 transform + 3, // 16x32 transform + 3, // 32x16 transform + 5, // 32x64 transform + 5, // 64x32 transform + 2, // 4x16 transform + 2, // 16x4 transform + 2, // 8x32 transform + 2, // 32x8 transform + 3, // 16x64 transform + 3, // 64x16 transform +}; + +static double avg_error_ls[TX_SIZES_ALL] = { + 0.002, // 4x4 transform + 0.05, // 8x8 transform + 0.07, // 16x16 transform + 0.4, // 32x32 transform + 0.3, // 64x64 transform + 0.02, // 4x8 transform + 0.02, // 8x4 transform + 0.04, // 8x16 transform + 0.07, // 16x8 transform + 0.4, // 16x32 transform + 0.5, // 32x16 transform + 0.38, // 32x64 transform + 0.39, // 64x32 transform + 0.2, // 4x16 transform + 0.2, // 16x4 transform + 0.2, // 8x32 transform + 0.2, // 32x8 transform + 0.38, // 16x64 transform + 0.38, // 64x16 transform +}; + +vector<AV1InvTxfm2dParam> GetInvTxfm2dParamList() { + vector<AV1InvTxfm2dParam> param_list; + for (int s = 0; s < TX_SIZES; ++s) { + const int max_error = max_error_ls[s]; + const double avg_error = avg_error_ls[s]; + for (int t = 0; t < TX_TYPES; ++t) { + const TxType tx_type = static_cast<TxType>(t); + const TxSize tx_size = static_cast<TxSize>(s); + if (libaom_test::IsTxSizeTypeValid(tx_size, tx_type)) { + param_list.push_back( + AV1InvTxfm2dParam(tx_type, tx_size, max_error, avg_error)); + } + } + } + return param_list; +} + +INSTANTIATE_TEST_SUITE_P(C, AV1InvTxfm2d, + ::testing::ValuesIn(GetInvTxfm2dParamList())); + +TEST_P(AV1InvTxfm2d, RunRoundtripCheck) { RunRoundtripCheck(); } + +TEST(AV1InvTxfm2d, CfgTest) { + for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) { + int bd = libaom_test::bd_arr[bd_idx]; + int8_t low_range = libaom_test::low_range_arr[bd_idx]; + int8_t high_range = libaom_test::high_range_arr[bd_idx]; + for (int tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) { + for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + if (libaom_test::IsTxSizeTypeValid(static_cast<TxSize>(tx_size), + static_cast<TxType>(tx_type)) == + false) { + continue; + } + TXFM_2D_FLIP_CFG cfg; + av1_get_inv_txfm_cfg(static_cast<TxType>(tx_type), + static_cast<TxSize>(tx_size), &cfg); + int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; + int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; + av1_gen_inv_stage_range(stage_range_col, stage_range_row, &cfg, + static_cast<TxSize>(tx_size), bd); + libaom_test::txfm_stage_range_check(stage_range_col, cfg.stage_num_col, + cfg.cos_bit_col, low_range, + high_range); + libaom_test::txfm_stage_range_check(stage_range_row, cfg.stage_num_row, + cfg.cos_bit_row, low_range, + high_range); + } + } + } +} + +typedef std::tuple<const LbdInvTxfm2dFunc> AV1LbdInvTxfm2dParam; +class AV1LbdInvTxfm2d : public ::testing::TestWithParam<AV1LbdInvTxfm2dParam> { + public: + void SetUp() override { target_func_ = GET_PARAM(0); } + void RunAV1InvTxfm2dTest(TxType tx_type, TxSize tx_size, int run_times, + int gt_int16 = 0); + + private: + LbdInvTxfm2dFunc target_func_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1LbdInvTxfm2d); + +void AV1LbdInvTxfm2d::RunAV1InvTxfm2dTest(TxType tx_type, TxSize tx_size, + int run_times, int gt_int16) { + FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size]; + InvTxfm2dFunc ref_func_ = libaom_test::inv_txfm_func_ls[tx_size]; + if (fwd_func_ == nullptr || ref_func_ == nullptr || target_func_ == nullptr) { + return; + } + const int bd = 8; + const int BLK_WIDTH = 64; + const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH; + DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 }; + DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 }; + DECLARE_ALIGNED(16, uint8_t, output[BLK_SIZE]) = { 0 }; + DECLARE_ALIGNED(16, uint16_t, ref_output[BLK_SIZE]) = { 0 }; + int stride = BLK_WIDTH; + int rows = tx_size_high[tx_size]; + int cols = tx_size_wide[tx_size]; + const int rows_nonezero = AOMMIN(32, rows); + const int cols_nonezero = AOMMIN(32, cols); + run_times /= (rows * cols); + run_times = AOMMAX(1, run_times); + const SCAN_ORDER *scan_order = get_default_scan(tx_size, tx_type); + const int16_t *scan = scan_order->scan; + const int16_t eobmax = rows_nonezero * cols_nonezero; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int randTimes = run_times == 1 ? (eobmax + 500) : 1; + + for (int cnt = 0; cnt < randTimes; ++cnt) { + const int16_t max_in = (1 << (bd)) - 1; + for (int r = 0; r < BLK_WIDTH; ++r) { + for (int c = 0; c < BLK_WIDTH; ++c) { + input[r * cols + c] = (cnt == 0) ? max_in : rnd.Rand8Extremes(); + output[r * stride + c] = (cnt == 0) ? 128 : rnd.Rand8(); + ref_output[r * stride + c] = output[r * stride + c]; + } + } + fwd_func_(input, inv_input, stride, tx_type, bd); + + // produce eob input by setting high freq coeffs to zero + const int eob = AOMMIN(cnt + 1, eobmax); + for (int i = eob; i < eobmax; i++) { + inv_input[scan[i]] = 0; + } + if (gt_int16) { + inv_input[scan[eob - 1]] = ((int32_t)INT16_MAX * 100 / 141); + } + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + ref_func_(inv_input, ref_output, stride, tx_type, bd); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + target_func_(inv_input, output, stride, tx_type, tx_size, eob); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 10) { + printf("txfm[%d] %3dx%-3d:%7.2f/%7.2fns", tx_type, cols, rows, time1, + time2); + printf("(%3.2f)\n", time1 / time2); + } + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + uint8_t ref_value = static_cast<uint8_t>(ref_output[r * stride + c]); + if (ref_value != output[r * stride + c]) { + printf(" "); + } + ASSERT_EQ(ref_value, output[r * stride + c]) + << "[" << r << "," << c << "] " << cnt << " tx_size: " << cols + << "x" << rows << " tx_type: " << tx_type_name[tx_type] << " eob " + << eob; + } + } + } +} + +TEST_P(AV1LbdInvTxfm2d, match) { + for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) { + for (int i = 0; i < (int)TX_TYPES; ++i) { + if (libaom_test::IsTxSizeTypeValid(static_cast<TxSize>(j), + static_cast<TxType>(i))) { + RunAV1InvTxfm2dTest(static_cast<TxType>(i), static_cast<TxSize>(j), 1); + } + } + } +} + +TEST_P(AV1LbdInvTxfm2d, gt_int16) { + static const TxType types[] = { DCT_DCT, ADST_DCT, FLIPADST_DCT, IDTX, + V_DCT, H_DCT, H_ADST, H_FLIPADST }; + for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) { + const TxSize sz = static_cast<TxSize>(j); + for (uint8_t i = 0; i < sizeof(types) / sizeof(types[0]); ++i) { + const TxType tp = types[i]; + if (libaom_test::IsTxSizeTypeValid(sz, tp)) { + RunAV1InvTxfm2dTest(tp, sz, 1, 1); + } + } + } +} + +TEST_P(AV1LbdInvTxfm2d, DISABLED_Speed) { + for (int j = 1; j < (int)(TX_SIZES_ALL); ++j) { + for (int i = 0; i < (int)TX_TYPES; ++i) { + if (libaom_test::IsTxSizeTypeValid(static_cast<TxSize>(j), + static_cast<TxType>(i))) { + RunAV1InvTxfm2dTest(static_cast<TxType>(i), static_cast<TxSize>(j), + 10000000); + } + } + } +} + +#if HAVE_SSSE3 +extern "C" void av1_lowbd_inv_txfm2d_add_ssse3(const int32_t *input, + uint8_t *output, int stride, + TxType tx_type, TxSize tx_size, + int eob); +INSTANTIATE_TEST_SUITE_P(SSSE3, AV1LbdInvTxfm2d, + ::testing::Values(av1_lowbd_inv_txfm2d_add_ssse3)); +#endif // HAVE_SSSE3 + +#if HAVE_AVX2 +extern "C" void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input, + uint8_t *output, int stride, + TxType tx_type, TxSize tx_size, + int eob); + +INSTANTIATE_TEST_SUITE_P(AVX2, AV1LbdInvTxfm2d, + ::testing::Values(av1_lowbd_inv_txfm2d_add_avx2)); +#endif // HAVE_AVX2 + +#if HAVE_NEON +extern "C" void av1_lowbd_inv_txfm2d_add_neon(const int32_t *input, + uint8_t *output, int stride, + TX_TYPE tx_type, TX_SIZE tx_size, + int eob); + +INSTANTIATE_TEST_SUITE_P(NEON, AV1LbdInvTxfm2d, + ::testing::Values(av1_lowbd_inv_txfm2d_add_neon)); +#endif // HAVE_NEON + +} // namespace diff --git a/third_party/aom/test/av1_k_means_test.cc b/third_party/aom/test/av1_k_means_test.cc new file mode 100644 index 0000000000..7e66a8e01d --- /dev/null +++ b/third_party/aom/test/av1_k_means_test.cc @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdlib> +#include <new> +#include <tuple> + +#include "config/aom_config.h" +#include "config/av1_rtcd.h" + +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_mem/aom_mem.h" +#include "aom_ports/aom_timer.h" +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "av1/encoder/palette.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace AV1Kmeans { +typedef void (*av1_calc_indices_dim1_func)(const int16_t *data, + const int16_t *centroids, + uint8_t *indices, + int64_t *total_dist, int n, int k); +typedef void (*av1_calc_indices_dim2_func)(const int16_t *data, + const int16_t *centroids, + uint8_t *indices, + int64_t *total_dist, int n, int k); + +typedef std::tuple<av1_calc_indices_dim1_func, BLOCK_SIZE> + av1_calc_indices_dim1Param; + +typedef std::tuple<av1_calc_indices_dim2_func, BLOCK_SIZE> + av1_calc_indices_dim2Param; + +class AV1KmeansTest1 + : public ::testing::TestWithParam<av1_calc_indices_dim1Param> { + public: + ~AV1KmeansTest1() override; + void SetUp() override; + + protected: + void RunCheckOutput(av1_calc_indices_dim1_func test_impl, BLOCK_SIZE bsize, + int centroids); + void RunSpeedTest(av1_calc_indices_dim1_func test_impl, BLOCK_SIZE bsize, + int centroids); + bool CheckResult(int n) { + for (int idx = 0; idx < n; ++idx) { + if (indices1_[idx] != indices2_[idx]) { + printf("%d ", idx); + printf("%d != %d ", indices1_[idx], indices2_[idx]); + return false; + } + } + return true; + } + + libaom_test::ACMRandom rnd_; + int16_t data_[4096]; + int16_t centroids_[8]; + uint8_t indices1_[4096]; + uint8_t indices2_[4096]; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1KmeansTest1); + +AV1KmeansTest1::~AV1KmeansTest1() = default; + +void AV1KmeansTest1::SetUp() { + rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); + for (int i = 0; i < 4096; ++i) { + data_[i] = (int)rnd_.Rand8() << 4; + } + for (int i = 0; i < 8; i++) { + centroids_[i] = (int)rnd_.Rand8() << 4; + } +} + +void AV1KmeansTest1::RunCheckOutput(av1_calc_indices_dim1_func test_impl, + BLOCK_SIZE bsize, int k) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int n = w * h; + int64_t total_dist_dim1, total_dist_impl; + av1_calc_indices_dim1_c(data_, centroids_, indices1_, &total_dist_dim1, n, k); + test_impl(data_, centroids_, indices2_, &total_dist_impl, n, k); + + ASSERT_EQ(total_dist_dim1, total_dist_impl); + ASSERT_EQ(CheckResult(n), true) + << " block " << bsize << " index " << n << " Centroids " << k; +} + +void AV1KmeansTest1::RunSpeedTest(av1_calc_indices_dim1_func test_impl, + BLOCK_SIZE bsize, int k) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int n = w * h; + const int num_loops = 1000000000 / n; + + av1_calc_indices_dim1_func funcs[2] = { av1_calc_indices_dim1_c, test_impl }; + double elapsed_time[2] = { 0 }; + for (int i = 0; i < 2; ++i) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + av1_calc_indices_dim1_func func = funcs[i]; + for (int j = 0; j < num_loops; ++j) { + func(data_, centroids_, indices1_, /*total_dist=*/nullptr, n, k); + } + aom_usec_timer_mark(&timer); + double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); + elapsed_time[i] = 1000.0 * time / num_loops; + } + printf("av1_calc_indices_dim1 indices= %d centroids=%d: %7.2f/%7.2fns", n, k, + elapsed_time[0], elapsed_time[1]); + printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); +} + +TEST_P(AV1KmeansTest1, CheckOutput) { + // centroids = 2..8 + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 2); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 3); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 4); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 5); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 6); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 7); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 8); +} + +TEST_P(AV1KmeansTest1, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 2); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 3); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 4); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 5); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 6); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 7); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 8); +} + +class AV1KmeansTest2 + : public ::testing::TestWithParam<av1_calc_indices_dim2Param> { + public: + ~AV1KmeansTest2() override; + void SetUp() override; + + protected: + void RunCheckOutput(av1_calc_indices_dim2_func test_impl, BLOCK_SIZE bsize, + int centroids); + void RunSpeedTest(av1_calc_indices_dim2_func test_impl, BLOCK_SIZE bsize, + int centroids); + bool CheckResult(int n) { + bool flag = true; + for (int idx = 0; idx < n; ++idx) { + if (indices1_[idx] != indices2_[idx]) { + printf("%d ", idx); + printf("%d != %d ", indices1_[idx], indices2_[idx]); + flag = false; + } + } + if (flag == false) { + return false; + } + return true; + } + + libaom_test::ACMRandom rnd_; + int16_t data_[4096 * 2]; + int16_t centroids_[8 * 2]; + uint8_t indices1_[4096]; + uint8_t indices2_[4096]; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1KmeansTest2); + +AV1KmeansTest2::~AV1KmeansTest2() = default; + +void AV1KmeansTest2::SetUp() { + rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); + for (int i = 0; i < 4096 * 2; ++i) { + data_[i] = (int)rnd_.Rand8(); + } + for (int i = 0; i < 8 * 2; i++) { + centroids_[i] = (int)rnd_.Rand8(); + } +} + +void AV1KmeansTest2::RunCheckOutput(av1_calc_indices_dim2_func test_impl, + BLOCK_SIZE bsize, int k) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int n = w * h; + int64_t total_dist_dim2, total_dist_impl; + av1_calc_indices_dim2_c(data_, centroids_, indices1_, &total_dist_dim2, n, k); + test_impl(data_, centroids_, indices2_, &total_dist_impl, n, k); + + ASSERT_EQ(total_dist_dim2, total_dist_impl); + ASSERT_EQ(CheckResult(n), true) + << " block " << bsize << " index " << n << " Centroids " << k; +} + +void AV1KmeansTest2::RunSpeedTest(av1_calc_indices_dim2_func test_impl, + BLOCK_SIZE bsize, int k) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int n = w * h; + const int num_loops = 1000000000 / n; + + av1_calc_indices_dim2_func funcs[2] = { av1_calc_indices_dim2_c, test_impl }; + double elapsed_time[2] = { 0 }; + for (int i = 0; i < 2; ++i) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + av1_calc_indices_dim2_func func = funcs[i]; + for (int j = 0; j < num_loops; ++j) { + func(data_, centroids_, indices1_, /*total_dist=*/nullptr, n, k); + } + aom_usec_timer_mark(&timer); + double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); + elapsed_time[i] = 1000.0 * time / num_loops; + } + printf("av1_calc_indices_dim2 indices= %d centroids=%d: %7.2f/%7.2fns", n, k, + elapsed_time[0], elapsed_time[1]); + printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); +} + +TEST_P(AV1KmeansTest2, CheckOutput) { + // centroids = 2..8 + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 2); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 3); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 4); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 5); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 6); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 7); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 8); +} + +TEST_P(AV1KmeansTest2, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 2); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 3); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 4); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 5); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 6); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 7); + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 8); +} + +#if HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON +const BLOCK_SIZE kValidBlockSize[] = { BLOCK_8X8, BLOCK_8X16, BLOCK_8X32, + BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, + BLOCK_32X8, BLOCK_32X16, BLOCK_32X32, + BLOCK_32X64, BLOCK_64X32, BLOCK_64X64, + BLOCK_16X64, BLOCK_64X16 }; +#endif + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AV1KmeansTest1, + ::testing::Combine(::testing::Values(&av1_calc_indices_dim1_sse2), + ::testing::ValuesIn(kValidBlockSize))); +INSTANTIATE_TEST_SUITE_P( + SSE2, AV1KmeansTest2, + ::testing::Combine(::testing::Values(&av1_calc_indices_dim2_sse2), + ::testing::ValuesIn(kValidBlockSize))); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1KmeansTest1, + ::testing::Combine(::testing::Values(&av1_calc_indices_dim1_avx2), + ::testing::ValuesIn(kValidBlockSize))); +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1KmeansTest2, + ::testing::Combine(::testing::Values(&av1_calc_indices_dim2_avx2), + ::testing::ValuesIn(kValidBlockSize))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1KmeansTest1, + ::testing::Combine(::testing::Values(&av1_calc_indices_dim1_neon), + ::testing::ValuesIn(kValidBlockSize))); +INSTANTIATE_TEST_SUITE_P( + NEON, AV1KmeansTest2, + ::testing::Combine(::testing::Values(&av1_calc_indices_dim2_neon), + ::testing::ValuesIn(kValidBlockSize))); +#endif + +} // namespace AV1Kmeans diff --git a/third_party/aom/test/av1_key_value_api_test.cc b/third_party/aom/test/av1_key_value_api_test.cc new file mode 100644 index 0000000000..a5734f6beb --- /dev/null +++ b/third_party/aom/test/av1_key_value_api_test.cc @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstring> +#include <tuple> + +#include "aom/aom_codec.h" +#include "aom/aom_decoder.h" +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "aom/aomdx.h" +#include "config/aom_config.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { +typedef std::tuple<const char *, const char *> KeyValParam; + +class BaseKeyValAPI : public testing::Test { + public: + void SetUp() override { +#if CONFIG_AV1_ENCODER + aom_codec_iface_t *iface_cx = aom_codec_av1_cx(); + aom_codec_enc_cfg_t enc_cfg; +#if CONFIG_REALTIME_ONLY + const int usage = 1; +#else + const int usage = 0; +#endif + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface_cx, &enc_cfg, usage)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_init(&enc_, iface_cx, &enc_cfg, usage)); +#endif +#if CONFIG_AV1_DECODER + aom_codec_iface_t *iface_dx = aom_codec_av1_dx(); + aom_codec_dec_cfg_t dec_cfg = { 0, 0, 0, !FORCE_HIGHBITDEPTH_DECODING }; + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec_, iface_dx, &dec_cfg, 0)); +#endif + } + + void TearDown() override { +#if CONFIG_AV1_ENCODER + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc_)); +#endif +#if CONFIG_AV1_DECODER + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec_)); +#endif + } + + protected: +#if CONFIG_AV1_ENCODER + aom_codec_ctx_t enc_; +#endif +#if CONFIG_AV1_DECODER + aom_codec_ctx_t dec_; +#endif +}; + +// Tests on encoder options. +// Need to add ones for the decoder in the future if it is also supported in the +// key & value API. +#if CONFIG_AV1_ENCODER +class EncValidTest : public BaseKeyValAPI, + public testing::WithParamInterface<KeyValParam> {}; +class EncInvalidTest : public BaseKeyValAPI, + public testing::WithParamInterface<KeyValParam> {}; + +TEST_P(EncValidTest, Valid) { + const char *key = std::get<0>(GetParam()); + const char *val = std::get<1>(GetParam()); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_set_option(&enc_, key, val)); +} + +TEST_P(EncInvalidTest, NullArg) { + const char *key = std::get<0>(GetParam()); + const char *val = std::get<1>(GetParam()); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_set_option(nullptr, key, val)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_set_option(&enc_, nullptr, val)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_set_option(&enc_, key, nullptr)); +} + +TEST_P(EncInvalidTest, InvalidParam) { + const char *key = std::get<0>(GetParam()); + const char *val = std::get<1>(GetParam()); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_set_option(&enc_, key, val)); + ASSERT_NE(aom_codec_error_detail(&enc_), nullptr); + EXPECT_GT(strlen(aom_codec_error_detail(&enc_)), 0u); +} + +// No test for ratio / list for now since the API does not support any of the +// parameters of these type. +// The string type typically involves reading a path/file, which brings +// potential fails. +const KeyValParam enc_valid_params[] = { + std::make_tuple("auto-intra-tools-off", "1"), // uint + std::make_tuple("min-gf-interval", "10"), // uint + std::make_tuple("min-partition-size", "4"), // int + std::make_tuple("tune", "psnr"), // enum +}; + +const KeyValParam enc_invalid_params[] = { + // no match + std::make_tuple("a-b-c", "10"), + // uint + std::make_tuple("min-gf-interval", "-1"), + std::make_tuple("min-gf-interval", "1.1"), + std::make_tuple("min-gf-interval", "abc"), + // int + std::make_tuple("min-partition-size", "1.1"), + std::make_tuple("min-partition-size", "abc"), + // enum + std::make_tuple("tune", "PsnR1"), + // out of range + std::make_tuple("cq-level", "1000"), +}; + +INSTANTIATE_TEST_SUITE_P(KeyValAPI, EncValidTest, + testing::ValuesIn(enc_valid_params)); + +INSTANTIATE_TEST_SUITE_P(KeyValAPI, EncInvalidTest, + testing::ValuesIn(enc_invalid_params)); +#endif // CONFIG_AV1_ENCODER + +} // namespace diff --git a/third_party/aom/test/av1_nn_predict_test.cc b/third_party/aom/test/av1_nn_predict_test.cc new file mode 100644 index 0000000000..4201ea6ce6 --- /dev/null +++ b/third_party/aom/test/av1_nn_predict_test.cc @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "aom/aom_integer.h" +#include "aom_ports/aom_timer.h" +#include "av1/encoder/ml.h" +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" +#include "test/util.h" +#include "test/register_state_check.h" +#include "test/acm_random.h" + +namespace { +typedef void (*NnPredict_Func)(const float *const input_nodes, + const NN_CONFIG *const nn_config, + int reduce_prec, float *const output); + +typedef std::tuple<const NnPredict_Func> NnPredictTestParam; + +const float epsilon = 1e-3f; // Error threshold for functional equivalence + +class NnPredictTest : public ::testing::TestWithParam<NnPredictTestParam> { + public: + void SetUp() override { + const int MAX_NODES2 = NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER; + // Allocate two massive buffers on the heap for edge weights and node bias + // Then set-up the double-dimension arrays pointing into the big buffers + weights_buf = (float *)aom_malloc(MAX_NODES2 * (NN_MAX_HIDDEN_LAYERS + 1) * + sizeof(*weights_buf)); + bias_buf = + (float *)aom_malloc(NN_MAX_NODES_PER_LAYER * + (NN_MAX_HIDDEN_LAYERS + 1) * sizeof(*bias_buf)); + ASSERT_NE(weights_buf, nullptr); + ASSERT_NE(bias_buf, nullptr); + for (int i = 0; i < NN_MAX_HIDDEN_LAYERS + 1; i++) { + weights[i] = &weights_buf[i * MAX_NODES2]; + bias[i] = &bias_buf[i * NN_MAX_NODES_PER_LAYER]; + } + target_func_ = GET_PARAM(0); + } + void TearDown() override { + aom_free(weights_buf); + aom_free(bias_buf); + } + void RunNnPredictTest(const NN_CONFIG *const shape); + void RunNnPredictSpeedTest(const NN_CONFIG *const shape, const int run_times); + void RunNnPredictTest_all(const NN_CONFIG *const shapes, + const int num_shapes); + void RunNnPredictSpeedTest_all(const NN_CONFIG *const shapes, + const int num_shapes, const int run_times); + + private: + NnPredict_Func target_func_; + libaom_test::ACMRandom rng_; + float *weights[NN_MAX_HIDDEN_LAYERS + 1] = {}; + float *bias[NN_MAX_HIDDEN_LAYERS + 1] = {}; + float *weights_buf = nullptr, *bias_buf = nullptr; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(NnPredictTest); + +void NnPredictTest::RunNnPredictTest(const NN_CONFIG *const shape) { + float inputs[NN_MAX_NODES_PER_LAYER] = { 0 }; + float outputs_test[NN_MAX_NODES_PER_LAYER] = { 0 }; + float outputs_ref[NN_MAX_NODES_PER_LAYER] = { 0 }; + + NN_CONFIG nn_config; + memcpy(&nn_config, shape, sizeof(nn_config)); + + char shape_str[32] = { 0 }; + snprintf(shape_str, sizeof(shape_str), "%d", shape->num_inputs); + for (int layer = 0; layer < shape->num_hidden_layers; layer++) + snprintf(&shape_str[strlen(shape_str)], + sizeof(shape_str) - strlen(shape_str), "x%d", + shape->num_hidden_nodes[layer]); + snprintf(&shape_str[strlen(shape_str)], sizeof(shape_str) - strlen(shape_str), + "x%d", shape->num_outputs); + + for (int i = 0; i < NN_MAX_HIDDEN_LAYERS + 1; i++) { + nn_config.weights[i] = weights[i]; + nn_config.bias[i] = bias[i]; + } + + for (int iter = 0; iter < 10000 && !HasFatalFailure(); ++iter) { + for (int node = 0; node < shape->num_inputs; node++) { + inputs[node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31); + } + for (int layer = 0; layer < shape->num_hidden_layers; layer++) { + for (int node = 0; node < NN_MAX_NODES_PER_LAYER; node++) { + bias[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31); + } + for (int node = 0; node < NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER; + node++) { + weights[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31); + } + } + // Now the outputs: + int layer = shape->num_hidden_layers; + for (int node = 0; node < NN_MAX_NODES_PER_LAYER; node++) { + bias[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31); + } + for (int node = 0; node < NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER; + node++) { + weights[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31); + } + + av1_nn_predict_c(inputs, &nn_config, 0, outputs_ref); + target_func_(inputs, &nn_config, 0, outputs_test); + + for (int node = 0; node < shape->num_outputs; node++) { + if (outputs_ref[node] < epsilon) { + ASSERT_LE(outputs_test[node], epsilon) + << "Reference output was near-zero, test output was not (" + << shape_str << ")"; + } else { + const float error = outputs_ref[node] - outputs_test[node]; + const float relative_error = fabsf(error / outputs_ref[node]); + ASSERT_LE(relative_error, epsilon) + << "Excessive relative error between reference and test (" + << shape_str << ")"; + } + } + } +} + +void NnPredictTest::RunNnPredictSpeedTest(const NN_CONFIG *const shape, + const int run_times) { + float inputs[NN_MAX_NODES_PER_LAYER] = { 0 }; + float outputs_test[NN_MAX_NODES_PER_LAYER] = { 0 }; + float outputs_ref[NN_MAX_NODES_PER_LAYER] = { 0 }; + + NN_CONFIG nn_config; + memcpy(&nn_config, shape, sizeof(nn_config)); + + for (int i = 0; i < NN_MAX_HIDDEN_LAYERS; i++) { + nn_config.weights[i] = weights[i]; + nn_config.bias[i] = bias[i]; + } + // Don't bother actually changing the values for inputs/weights/bias: it + // shouldn't make any difference for a speed test. + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + av1_nn_predict_c(inputs, &nn_config, 0, outputs_ref); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + target_func_(inputs, &nn_config, 0, outputs_test); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("%d", shape->num_inputs); + for (int layer = 0; layer < shape->num_hidden_layers; layer++) + printf("x%d", shape->num_hidden_nodes[layer]); + printf("x%d: ", shape->num_outputs); + printf("%7.2f/%7.2fns (%3.2f)\n", time1, time2, time1 / time2); +} + +// This is all the neural network shapes observed executed in a few different +// runs of the encoder. It also conveniently covers all the kernels +// implemented. +static const NN_CONFIG kShapes[] = { + { 37, 1, 2, { 16, 24 }, {}, {} }, { 24, 24, 1, { 12 }, {}, {} }, + { 10, 16, 1, { 64 }, {}, {} }, { 12, 1, 1, { 12 }, {}, {} }, + { 12, 1, 1, { 24 }, {}, {} }, { 12, 1, 1, { 32 }, {}, {} }, + { 18, 4, 1, { 24 }, {}, {} }, { 18, 4, 1, { 32 }, {}, {} }, + { 4, 1, 1, { 16 }, {}, {} }, { 8, 1, 0, { 0 }, {}, {} }, + { 8, 4, 1, { 16 }, {}, {} }, { 8, 1, 1, { 32 }, {}, {} }, + { 9, 3, 1, { 32 }, {}, {} }, { 8, 4, 0, { 0 }, {}, {} }, + { 8, 8, 0, { 0 }, {}, {} }, { 4, 4, 1, { 8 }, {}, {} }, + { 4, 3, 0, { 64 }, {}, {} }, +}; + +void NnPredictTest::RunNnPredictTest_all(const NN_CONFIG *const shapes, + const int num_shapes) { + for (int i = 0; i < num_shapes; i++) RunNnPredictTest(&shapes[i]); +} + +void NnPredictTest::RunNnPredictSpeedTest_all(const NN_CONFIG *const shapes, + const int num_shapes, + const int run_times) { + for (int i = 0; i < num_shapes; i++) + NnPredictTest::RunNnPredictSpeedTest(&shapes[i], run_times); +} + +TEST_P(NnPredictTest, RandomValues) { + RunNnPredictTest_all(kShapes, sizeof(kShapes) / sizeof(kShapes[0])); +} + +TEST_P(NnPredictTest, DISABLED_Speed) { + RunNnPredictSpeedTest_all(kShapes, sizeof(kShapes) / sizeof(kShapes[0]), + 10000000); +} + +#if !CONFIG_EXCLUDE_SIMD_MISMATCH +#if HAVE_SSE3 +INSTANTIATE_TEST_SUITE_P(SSE3, NnPredictTest, + ::testing::Values(av1_nn_predict_sse3)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, NnPredictTest, + ::testing::Values(av1_nn_predict_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, NnPredictTest, + ::testing::Values(av1_nn_predict_neon)); +#endif +#endif // !CONFIG_EXCLUDE_SIMD_MISMATCH + +} // namespace diff --git a/third_party/aom/test/av1_quantize_test.cc b/third_party/aom/test/av1_quantize_test.cc new file mode 100644 index 0000000000..c8af14a356 --- /dev/null +++ b/third_party/aom/test/av1_quantize_test.cc @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <stdlib.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/av1_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "av1/common/scan.h" +#include "av1/encoder/av1_quantize.h" + +namespace { + +typedef void (*QuantizeFpFunc)( + const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, int log_scale); + +struct QuantizeFuncParams { + QuantizeFuncParams(QuantizeFpFunc qF = nullptr, + QuantizeFpFunc qRefF = nullptr, int count = 16) + : qFunc(qF), qFuncRef(qRefF), coeffCount(count) {} + QuantizeFpFunc qFunc; + QuantizeFpFunc qFuncRef; + int coeffCount; +}; + +using libaom_test::ACMRandom; + +const int numTests = 1000; +const int maxSize = 1024; +const int roundFactorRange = 127; +const int dequantRange = 32768; +const int coeffRange = (1 << 20) - 1; + +class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> { + public: + void RunQuantizeTest() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]); + DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]); + DECLARE_ALIGNED(16, int16_t, round_ptr[8]); + DECLARE_ALIGNED(16, int16_t, quant_ptr[8]); + DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[8]); + DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]); + uint16_t eob; + uint16_t ref_eob; + int err_count_total = 0; + int first_failure = -1; + int count = params_.coeffCount; + const TX_SIZE txSize = getTxSize(count); + int log_scale = (txSize == TX_32X32); + QuantizeFpFunc quanFunc = params_.qFunc; + QuantizeFpFunc quanFuncRef = params_.qFuncRef; + + const SCAN_ORDER scanOrder = av1_scan_orders[txSize][DCT_DCT]; + for (int i = 0; i < numTests; i++) { + int err_count = 0; + ref_eob = eob = UINT16_MAX; + for (int j = 0; j < count; j++) { + coeff_ptr[j] = rnd(coeffRange); + } + + for (int j = 0; j < 2; j++) { + zbin_ptr[j] = rnd.Rand16Signed(); + quant_shift_ptr[j] = rnd.Rand16Signed(); + // int16_t positive + dequant_ptr[j] = abs(rnd(dequantRange)); + quant_ptr[j] = static_cast<int16_t>((1 << 16) / dequant_ptr[j]); + round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7; + } + for (int j = 2; j < 8; ++j) { + zbin_ptr[j] = zbin_ptr[1]; + quant_shift_ptr[j] = quant_shift_ptr[1]; + dequant_ptr[j] = dequant_ptr[1]; + quant_ptr[j] = quant_ptr[1]; + round_ptr[j] = round_ptr[1]; + } + quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr, + quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr, + &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale); + + API_REGISTER_STATE_CHECK( + quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr, + quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob, + scanOrder.scan, scanOrder.iscan, log_scale)); + + for (int j = 0; j < count; ++j) { + err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) | + (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]); + ASSERT_EQ(ref_qcoeff_ptr[j], qcoeff_ptr[j]) + << "qcoeff error: i = " << i << " j = " << j << "\n"; + EXPECT_EQ(ref_dqcoeff_ptr[j], dqcoeff_ptr[j]) + << "dqcoeff error: i = " << i << " j = " << j << "\n"; + } + EXPECT_EQ(ref_eob, eob) << "eob error: " + << "i = " << i << "\n"; + err_count += (ref_eob != eob); + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Quantization Test, C output doesn't match SSE2 output. " + << "First failed at test case " << first_failure; + } + + void RunEobTest() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]); + DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]); + DECLARE_ALIGNED(16, int16_t, round_ptr[8]); + DECLARE_ALIGNED(16, int16_t, quant_ptr[8]); + DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[8]); + DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]); + DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]); + uint16_t eob; + uint16_t ref_eob; + int count = params_.coeffCount; + const TX_SIZE txSize = getTxSize(count); + int log_scale = (txSize == TX_32X32); + QuantizeFpFunc quanFunc = params_.qFunc; + QuantizeFpFunc quanFuncRef = params_.qFuncRef; + const SCAN_ORDER scanOrder = av1_scan_orders[txSize][DCT_DCT]; + + for (int i = 0; i < numTests; i++) { + ref_eob = eob = UINT16_MAX; + for (int j = 0; j < count; j++) { + coeff_ptr[j] = 0; + } + + coeff_ptr[rnd(count)] = rnd(coeffRange); + coeff_ptr[rnd(count)] = rnd(coeffRange); + coeff_ptr[rnd(count)] = rnd(coeffRange); + + for (int j = 0; j < 2; j++) { + zbin_ptr[j] = rnd.Rand16Signed(); + quant_shift_ptr[j] = rnd.Rand16Signed(); + // int16_t positive + dequant_ptr[j] = abs(rnd(dequantRange)); + quant_ptr[j] = (1 << 16) / dequant_ptr[j]; + round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7; + } + for (int j = 2; j < 8; ++j) { + zbin_ptr[j] = zbin_ptr[1]; + quant_shift_ptr[j] = quant_shift_ptr[1]; + dequant_ptr[j] = dequant_ptr[1]; + quant_ptr[j] = quant_ptr[1]; + round_ptr[j] = round_ptr[1]; + } + + quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr, + quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr, + &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale); + + API_REGISTER_STATE_CHECK( + quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr, + quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob, + scanOrder.scan, scanOrder.iscan, log_scale)); + EXPECT_EQ(ref_eob, eob) << "eob error: " + << "i = " << i << "\n"; + } + } + + void SetUp() override { params_ = GetParam(); } + + ~AV1QuantizeTest() override = default; + + private: + TX_SIZE getTxSize(int count) { + switch (count) { + case 16: return TX_4X4; + case 64: return TX_8X8; + case 256: return TX_16X16; + case 1024: return TX_32X32; + default: return TX_4X4; + } + } + + QuantizeFuncParams params_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1QuantizeTest); + +TEST_P(AV1QuantizeTest, BitExactCheck) { RunQuantizeTest(); } +TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); } + +TEST(AV1QuantizeTest, QuantizeFpNoQmatrix) { + // Here we use a uniform quantizer as an example + const int16_t dequant_ptr[2] = { 78, 93 }; // quantize step + const int16_t round_ptr[2] = { 39, 46 }; // round ~= dequant / 2 + + // quant ~= 2^16 / dequant. This is a 16-bit fixed point representation of the + // inverse of quantize step. + const int16_t quant_ptr[2] = { 840, 704 }; + int log_scale = 0; + int coeff_count = 4; + const tran_low_t coeff_ptr[4] = { -449, 624, -14, 24 }; + const tran_low_t ref_qcoeff_ptr[4] = { -6, 7, 0, 0 }; + const tran_low_t ref_dqcoeff_ptr[4] = { -468, 651, 0, 0 }; + const int16_t scan[4] = { 0, 1, 2, 3 }; + tran_low_t qcoeff_ptr[4]; + tran_low_t dqcoeff_ptr[4]; + int eob = av1_quantize_fp_no_qmatrix(quant_ptr, dequant_ptr, round_ptr, + log_scale, scan, coeff_count, coeff_ptr, + qcoeff_ptr, dqcoeff_ptr); + EXPECT_EQ(eob, 2); + for (int i = 0; i < coeff_count; ++i) { + EXPECT_EQ(qcoeff_ptr[i], ref_qcoeff_ptr[i]); + EXPECT_EQ(dqcoeff_ptr[i], ref_dqcoeff_ptr[i]); + } +} + +#if HAVE_SSE4_1 +const QuantizeFuncParams qfps[4] = { + QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c, + 16), + QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c, + 64), + QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c, + 256), + QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c, + 1024), +}; + +INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +const QuantizeFuncParams qfps_avx2[4] = { + QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c, + 16), + QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c, + 64), + QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c, + 256), + QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c, + 1024), +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, AV1QuantizeTest, ::testing::ValuesIn(qfps_avx2)); +#endif // HAVE_AVX2 + +} // namespace diff --git a/third_party/aom/test/av1_round_shift_array_test.cc b/third_party/aom/test/av1_round_shift_array_test.cc new file mode 100644 index 0000000000..937e8645a5 --- /dev/null +++ b/third_party/aom/test/av1_round_shift_array_test.cc @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <tuple> + +#include "config/av1_rtcd.h" + +#include "aom_mem/aom_mem.h" +#include "aom_ports/aom_timer.h" +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace AV1CompRoundShift { + +typedef void (*comp_round_shift_array_func)(int32_t *arr, int size, int bit); + +#if HAVE_SSE4_1 || HAVE_NEON +const int kValidBitCheck[] = { + -4, -3, -2, -1, 0, 1, 2, 3, 4, +}; +#endif // HAVE_SSE4_1 || HAVE_NEON + +typedef std::tuple<comp_round_shift_array_func, BLOCK_SIZE, int> + CompRoundShiftParam; + +class AV1CompRoundShiftTest + : public ::testing::TestWithParam<CompRoundShiftParam> { + public: + ~AV1CompRoundShiftTest() override; + + void SetUp() override { + rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); + } + + protected: + void RunCheckOutput(comp_round_shift_array_func test_impl, BLOCK_SIZE bsize, + int bit); + void RunSpeedTest(comp_round_shift_array_func test_impl, BLOCK_SIZE bsize, + int bit); + + libaom_test::ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompRoundShiftTest); + +AV1CompRoundShiftTest::~AV1CompRoundShiftTest() = default; + +void AV1CompRoundShiftTest::RunCheckOutput( + comp_round_shift_array_func test_impl, BLOCK_SIZE bsize, int bit) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int blk_wd = 64; + DECLARE_ALIGNED(32, int32_t, pred_[blk_wd]); + DECLARE_ALIGNED(32, int32_t, ref_buffer_[blk_wd]); + for (int i = 0; i < (blk_wd); ++i) { + ref_buffer_[i] = pred_[i] = rnd_.Rand31() / 16; + } + av1_round_shift_array_c(ref_buffer_, w, bit); + test_impl(pred_, w, bit); + for (int x = 0; x < w; ++x) { + ASSERT_EQ(ref_buffer_[x], pred_[x]) << w << "x" << h << "mismatch @" + << "(" << x << ")"; + } +} + +void AV1CompRoundShiftTest::RunSpeedTest(comp_round_shift_array_func test_impl, + BLOCK_SIZE bsize, int bit) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int blk_wd = 64; + DECLARE_ALIGNED(32, int32_t, ref_buffer_[blk_wd]); + for (int i = 0; i < (blk_wd); ++i) { + ref_buffer_[i] = rnd_.Rand31(); + } + + const int num_loops = 1000000000 / (w + h); + comp_round_shift_array_func funcs[2] = { av1_round_shift_array_c, test_impl }; + double elapsed_time[2] = { 0 }; + for (int i = 0; i < 2; ++i) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + comp_round_shift_array_func func = funcs[i]; + for (int j = 0; j < num_loops; ++j) { + func(ref_buffer_, w, bit); + } + aom_usec_timer_mark(&timer); + double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); + elapsed_time[i] = 1000.0 * time / num_loops; + } + printf("av1_round_shift_array %3dx%-3d: bit : %d %7.2f/%7.2fns", w, h, bit, + elapsed_time[0], elapsed_time[1]); + printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); +} + +TEST_P(AV1CompRoundShiftTest, CheckOutput) { + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)); +} + +TEST_P(AV1CompRoundShiftTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)); +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1CompRoundShiftTest, + ::testing::Combine(::testing::Values(&av1_round_shift_array_sse4_1), + ::testing::ValuesIn(txsize_to_bsize), + ::testing::ValuesIn(kValidBitCheck))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1CompRoundShiftTest, + ::testing::Combine(::testing::Values(&av1_round_shift_array_neon), + ::testing::ValuesIn(txsize_to_bsize), + ::testing::ValuesIn(kValidBitCheck))); +#endif + +} // namespace AV1CompRoundShift diff --git a/third_party/aom/test/av1_softmax_test.cc b/third_party/aom/test/av1_softmax_test.cc new file mode 100644 index 0000000000..2b04af1342 --- /dev/null +++ b/third_party/aom/test/av1_softmax_test.cc @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> +#include <new> +#include <tuple> + +#include "aom/aom_integer.h" +#include "aom_ports/aom_timer.h" +#include "av1/encoder/ml.h" +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { +using FastSoftmaxFn = void (*)(const float *const input, float *output); +using FastSoftmaxTestParams = std::tuple<const FastSoftmaxFn, int>; + +// Error thresholds for functional equivalence +constexpr float kRelEpsilon = 5e-2f; +constexpr float kAbsEpsilon = 5e-3f; + +class FastSoftmaxTest : public ::testing::TestWithParam<FastSoftmaxTestParams> { + public: + FastSoftmaxTest() : target_fn_(GET_PARAM(0)), num_classes_(GET_PARAM(1)) {} + void SetUp() override { + ref_buf_.reset(new (std::nothrow) float[num_classes_]()); + ASSERT_NE(ref_buf_, nullptr); + dst_buf_.reset(new (std::nothrow) float[num_classes_]()); + ASSERT_NE(dst_buf_, nullptr); + input_.reset(new (std::nothrow) float[num_classes_]()); + ASSERT_NE(input_, nullptr); + } + void RunSoftmaxTest(); + void RunSoftmaxSpeedTest(const int run_times); + void FillInputBuf(); + + private: + const FastSoftmaxFn target_fn_; + const int num_classes_; + std::unique_ptr<float[]> ref_buf_, dst_buf_, input_; + libaom_test::ACMRandom rng_; +}; + +void FastSoftmaxTest::FillInputBuf() { + for (int idx = 0; idx < num_classes_; idx++) { + input_[idx] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 30); + } +} + +void FastSoftmaxTest::RunSoftmaxTest() { + av1_nn_softmax(input_.get(), ref_buf_.get(), num_classes_); + target_fn_(input_.get(), dst_buf_.get()); + + for (int idx = 0; idx < num_classes_; idx++) { + if (ref_buf_[idx] < kAbsEpsilon) { + ASSERT_LE(dst_buf_[idx], kAbsEpsilon) + << "Reference output was near-zero, test output was not" << std::endl; + } else { + const float error = dst_buf_[idx] - ref_buf_[idx]; + const float relative_error = fabsf(error / ref_buf_[idx]); + ASSERT_LE(relative_error, kRelEpsilon) + << "Excessive relative error between reference and test output" + << std::endl; + ASSERT_LE(error, kAbsEpsilon) + << "Excessive absolute error between reference and test output" + << std::endl; + } + } +} + +void FastSoftmaxTest::RunSoftmaxSpeedTest(const int run_times) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int idx = 0; idx < run_times; idx++) { + target_fn_(input_.get(), dst_buf_.get()); + } + aom_usec_timer_mark(&timer); + const int64_t time = aom_usec_timer_elapsed(&timer); + std::cout << "Test with " << num_classes_ << " classes took " << time + << " us." << std::endl; +} + +TEST_P(FastSoftmaxTest, RandomValues) { + FillInputBuf(); + RunSoftmaxTest(); +} + +TEST_P(FastSoftmaxTest, DISABLED_Speed) { + constexpr int kNumTimes = 1000000; + RunSoftmaxSpeedTest(kNumTimes); +} + +void AnchorSoftmax16Fn(const float *input, float *output) { + av1_nn_softmax(input, output, 16); +} + +const FastSoftmaxTestParams kArrayParams_c[] = { + FastSoftmaxTestParams(AnchorSoftmax16Fn, 16), + FastSoftmaxTestParams(av1_nn_fast_softmax_16_c, 16) +}; +INSTANTIATE_TEST_SUITE_P(C, FastSoftmaxTest, + ::testing::ValuesIn(kArrayParams_c)); + +#if HAVE_SSE3 && !CONFIG_EXCLUDE_SIMD_MISMATCH +INSTANTIATE_TEST_SUITE_P( + SSE3, FastSoftmaxTest, + ::testing::Values(FastSoftmaxTestParams(av1_nn_fast_softmax_16_sse3, 16))); +#endif +} // namespace diff --git a/third_party/aom/test/av1_temporal_denoiser_test.cc b/third_party/aom/test/av1_temporal_denoiser_test.cc new file mode 100644 index 0000000000..7aa8fb6a66 --- /dev/null +++ b/third_party/aom/test/av1_temporal_denoiser_test.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <tuple> + +#include "config/av1_rtcd.h" + +#include "test/acm_random.h" +#include "test/util.h" +#include "test/register_state_check.h" + +#include "aom_scale/yv12config.h" +#include "aom/aom_integer.h" +#include "av1/common/reconinter.h" +#include "av1/encoder/context_tree.h" +#include "av1/encoder/av1_temporal_denoiser.h" + +using libaom_test::ACMRandom; + +namespace { + +const int kNumPixels = 128 * 128; + +typedef int (*Av1DenoiserFilterFunc)(const uint8_t *sig, int sig_stride, + const uint8_t *mc_avg, int mc_avg_stride, + uint8_t *avg, int avg_stride, + int increase_denoising, BLOCK_SIZE bs, + int motion_magnitude); +typedef std::tuple<Av1DenoiserFilterFunc, BLOCK_SIZE> AV1DenoiserTestParam; + +class AV1DenoiserTest + : public ::testing::Test, + public ::testing::WithParamInterface<AV1DenoiserTestParam> { + public: + ~AV1DenoiserTest() override = default; + + void SetUp() override { bs_ = GET_PARAM(1); } + + protected: + BLOCK_SIZE bs_; +}; + +TEST_P(AV1DenoiserTest, BitexactCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 4000; + + // Allocate the space for input and output, + // where sig_block is the block to be denoised, + // mc_avg_block is the denoised reference block, + // avg_block_c is the denoised result from C code, + // avg_block_sse2 is the denoised result from SSE2 code. + DECLARE_ALIGNED(16, uint8_t, sig_block[kNumPixels]); + DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]); + DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]); + DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]); + + for (int i = 0; i < count_test_block; ++i) { + // Generate random motion magnitude, 20% of which exceed the threshold. + const int motion_magnitude_random = + rnd.Rand8() % static_cast<int>(MOTION_MAGNITUDE_THRESHOLD * 1.2); + + // Initialize a test block with random number in range [0, 255]. + for (int j = 0; j < kNumPixels; ++j) { + int temp = 0; + sig_block[j] = rnd.Rand8(); + // The pixels in mc_avg_block are generated by adding a random + // number in range [-19, 19] to corresponding pixels in sig_block. + temp = + sig_block[j] + ((rnd.Rand8() % 2 == 0) ? -1 : 1) * (rnd.Rand8() % 20); + // Clip. + mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp); + } + + API_REGISTER_STATE_CHECK( + av1_denoiser_filter_c(sig_block, 128, mc_avg_block, 128, avg_block_c, + 128, 0, bs_, motion_magnitude_random)); + + API_REGISTER_STATE_CHECK(GET_PARAM(0)(sig_block, 128, mc_avg_block, 128, + avg_block_sse2, 128, 0, bs_, + motion_magnitude_random)); + + // Test bitexactness. + for (int h = 0; h < block_size_high[bs_]; ++h) { + for (int w = 0; w < block_size_wide[bs_]; ++w) { + EXPECT_EQ(avg_block_c[h * 128 + w], avg_block_sse2[h * 128 + w]); + } + } + } +} + +using std::make_tuple; + +// Test for all block size. +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AV1DenoiserTest, + ::testing::Values(make_tuple(&av1_denoiser_filter_sse2, BLOCK_8X8), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_8X16), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_16X8), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_16X16), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_16X32), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_32X16), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_32X32), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_32X64), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_64X32), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_64X64), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_128X64), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_64X128), + make_tuple(&av1_denoiser_filter_sse2, BLOCK_128X128))); +#endif // HAVE_SSE2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1DenoiserTest, + ::testing::Values(make_tuple(&av1_denoiser_filter_neon, BLOCK_8X8), + make_tuple(&av1_denoiser_filter_neon, BLOCK_8X16), + make_tuple(&av1_denoiser_filter_neon, BLOCK_16X8), + make_tuple(&av1_denoiser_filter_neon, BLOCK_16X16), + make_tuple(&av1_denoiser_filter_neon, BLOCK_16X32), + make_tuple(&av1_denoiser_filter_neon, BLOCK_32X16), + make_tuple(&av1_denoiser_filter_neon, BLOCK_32X32), + make_tuple(&av1_denoiser_filter_neon, BLOCK_32X64), + make_tuple(&av1_denoiser_filter_neon, BLOCK_64X32), + make_tuple(&av1_denoiser_filter_neon, BLOCK_64X64), + make_tuple(&av1_denoiser_filter_neon, BLOCK_128X64), + make_tuple(&av1_denoiser_filter_neon, BLOCK_64X128), + make_tuple(&av1_denoiser_filter_neon, BLOCK_128X128))); +#endif +} // namespace diff --git a/third_party/aom/test/av1_txfm_test.cc b/third_party/aom/test/av1_txfm_test.cc new file mode 100644 index 0000000000..77c0ec1071 --- /dev/null +++ b/third_party/aom/test/av1_txfm_test.cc @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "test/av1_txfm_test.h" + +#include <stdio.h> + +#include <memory> +#include <new> + +namespace libaom_test { + +const char *tx_type_name[] = { + "DCT_DCT", + "ADST_DCT", + "DCT_ADST", + "ADST_ADST", + "FLIPADST_DCT", + "DCT_FLIPADST", + "FLIPADST_FLIPADST", + "ADST_FLIPADST", + "FLIPADST_ADST", + "IDTX", + "V_DCT", + "H_DCT", + "V_ADST", + "H_ADST", + "V_FLIPADST", + "H_FLIPADST", +}; + +int get_txfm1d_size(TX_SIZE tx_size) { return tx_size_wide[tx_size]; } + +void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) { + switch (txfm2d_type) { + case DCT_DCT: + *type0 = TYPE_DCT; + *type1 = TYPE_DCT; + break; + case ADST_DCT: + *type0 = TYPE_ADST; + *type1 = TYPE_DCT; + break; + case DCT_ADST: + *type0 = TYPE_DCT; + *type1 = TYPE_ADST; + break; + case ADST_ADST: + *type0 = TYPE_ADST; + *type1 = TYPE_ADST; + break; + case FLIPADST_DCT: + *type0 = TYPE_ADST; + *type1 = TYPE_DCT; + break; + case DCT_FLIPADST: + *type0 = TYPE_DCT; + *type1 = TYPE_ADST; + break; + case FLIPADST_FLIPADST: + *type0 = TYPE_ADST; + *type1 = TYPE_ADST; + break; + case ADST_FLIPADST: + *type0 = TYPE_ADST; + *type1 = TYPE_ADST; + break; + case FLIPADST_ADST: + *type0 = TYPE_ADST; + *type1 = TYPE_ADST; + break; + case IDTX: + *type0 = TYPE_IDTX; + *type1 = TYPE_IDTX; + break; + case H_DCT: + *type0 = TYPE_IDTX; + *type1 = TYPE_DCT; + break; + case V_DCT: + *type0 = TYPE_DCT; + *type1 = TYPE_IDTX; + break; + case H_ADST: + *type0 = TYPE_IDTX; + *type1 = TYPE_ADST; + break; + case V_ADST: + *type0 = TYPE_ADST; + *type1 = TYPE_IDTX; + break; + case H_FLIPADST: + *type0 = TYPE_IDTX; + *type1 = TYPE_ADST; + break; + case V_FLIPADST: + *type0 = TYPE_ADST; + *type1 = TYPE_IDTX; + break; + default: + *type0 = TYPE_DCT; + *type1 = TYPE_DCT; + assert(0); + break; + } +} + +double Sqrt2 = pow(2, 0.5); +double invSqrt2 = 1 / pow(2, 0.5); + +double dct_matrix(double n, double k, int size) { + return cos(PI * (2 * n + 1) * k / (2 * size)); +} + +void reference_dct_1d(const double *in, double *out, int size) { + for (int k = 0; k < size; ++k) { + out[k] = 0; + for (int n = 0; n < size; ++n) { + out[k] += in[n] * dct_matrix(n, k, size); + } + if (k == 0) out[k] = out[k] * invSqrt2; + } +} + +void reference_idct_1d(const double *in, double *out, int size) { + for (int k = 0; k < size; ++k) { + out[k] = 0; + for (int n = 0; n < size; ++n) { + if (n == 0) + out[k] += invSqrt2 * in[n] * dct_matrix(k, n, size); + else + out[k] += in[n] * dct_matrix(k, n, size); + } + } +} + +// TODO(any): Copied from the old 'fadst4' (same as the new 'av1_fadst4' +// function). Should be replaced by a proper reference function that takes +// 'double' input & output. +static void fadst4_new(const tran_low_t *input, tran_low_t *output) { + tran_high_t x0, x1, x2, x3; + tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; + + x0 = input[0]; + x1 = input[1]; + x2 = input[2]; + x3 = input[3]; + + if (!(x0 | x1 | x2 | x3)) { + output[0] = output[1] = output[2] = output[3] = 0; + return; + } + + s0 = sinpi_1_9 * x0; + s1 = sinpi_4_9 * x0; + s2 = sinpi_2_9 * x1; + s3 = sinpi_1_9 * x1; + s4 = sinpi_3_9 * x2; + s5 = sinpi_4_9 * x3; + s6 = sinpi_2_9 * x3; + s7 = x0 + x1 - x3; + + x0 = s0 + s2 + s5; + x1 = sinpi_3_9 * s7; + x2 = s1 - s3 + s6; + x3 = s4; + + s0 = x0 + x3; + s1 = x1; + s2 = x2 - x3; + s3 = x2 - x0 + x3; + + // 1-D transform scaling factor is sqrt(2). + output[0] = (tran_low_t)fdct_round_shift(s0); + output[1] = (tran_low_t)fdct_round_shift(s1); + output[2] = (tran_low_t)fdct_round_shift(s2); + output[3] = (tran_low_t)fdct_round_shift(s3); +} + +void reference_adst_1d(const double *in, double *out, int size) { + if (size == 4) { // Special case. + tran_low_t int_input[4]; + for (int i = 0; i < 4; ++i) { + int_input[i] = static_cast<tran_low_t>(round(in[i])); + } + tran_low_t int_output[4]; + fadst4_new(int_input, int_output); + for (int i = 0; i < 4; ++i) { + out[i] = int_output[i]; + } + return; + } + + for (int k = 0; k < size; ++k) { + out[k] = 0; + for (int n = 0; n < size; ++n) { + out[k] += in[n] * sin(PI * (2 * n + 1) * (2 * k + 1) / (4 * size)); + } + } +} + +void reference_idtx_1d(const double *in, double *out, int size) { + double scale = 0; + if (size == 4) + scale = Sqrt2; + else if (size == 8) + scale = 2; + else if (size == 16) + scale = 2 * Sqrt2; + else if (size == 32) + scale = 4; + else if (size == 64) + scale = 4 * Sqrt2; + for (int k = 0; k < size; ++k) { + out[k] = in[k] * scale; + } +} + +void reference_hybrid_1d(double *in, double *out, int size, int type) { + if (type == TYPE_DCT) + reference_dct_1d(in, out, size); + else if (type == TYPE_ADST) + reference_adst_1d(in, out, size); + else + reference_idtx_1d(in, out, size); +} + +double get_amplification_factor(TX_TYPE tx_type, TX_SIZE tx_size) { + TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg; + av1_get_fwd_txfm_cfg(tx_type, tx_size, &fwd_txfm_flip_cfg); + const int tx_width = tx_size_wide[fwd_txfm_flip_cfg.tx_size]; + const int tx_height = tx_size_high[fwd_txfm_flip_cfg.tx_size]; + const int8_t *shift = fwd_txfm_flip_cfg.shift; + const int amplify_bit = shift[0] + shift[1] + shift[2]; + double amplify_factor = + amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit)); + + // For rectangular transforms, we need to multiply by an extra factor. + const int rect_type = get_rect_tx_log_ratio(tx_width, tx_height); + if (abs(rect_type) == 1) { + amplify_factor *= pow(2, 0.5); + } + return amplify_factor; +} + +void reference_hybrid_2d(double *in, double *out, TX_TYPE tx_type, + TX_SIZE tx_size) { + // Get transform type and size of each dimension. + TYPE_TXFM type0; + TYPE_TXFM type1; + get_txfm1d_type(tx_type, &type0, &type1); + const int tx_width = tx_size_wide[tx_size]; + const int tx_height = tx_size_high[tx_size]; + + std::unique_ptr<double[]> temp_in( + new (std::nothrow) double[AOMMAX(tx_width, tx_height)]); + std::unique_ptr<double[]> temp_out( + new (std::nothrow) double[AOMMAX(tx_width, tx_height)]); + std::unique_ptr<double[]> out_interm( + new (std::nothrow) double[tx_width * tx_height]); + ASSERT_NE(temp_in, nullptr); + ASSERT_NE(temp_out, nullptr); + ASSERT_NE(out_interm, nullptr); + + // Transform columns. + for (int c = 0; c < tx_width; ++c) { + for (int r = 0; r < tx_height; ++r) { + temp_in[r] = in[r * tx_width + c]; + } + reference_hybrid_1d(temp_in.get(), temp_out.get(), tx_height, type0); + for (int r = 0; r < tx_height; ++r) { + out_interm[r * tx_width + c] = temp_out[r]; + } + } + + // Transform rows. + for (int r = 0; r < tx_height; ++r) { + reference_hybrid_1d(out_interm.get() + r * tx_width, temp_out.get(), + tx_width, type1); + for (int c = 0; c < tx_width; ++c) { + out[c * tx_height + r] = temp_out[c]; + } + } + + // These transforms use an approximate 2D DCT transform, by only keeping the + // top-left quarter of the coefficients, and repacking them in the first + // quarter indices. + // TODO(urvang): Refactor this code. + if (tx_width == 64 && tx_height == 64) { // tx_size == TX_64X64 + // Zero out top-right 32x32 area. + for (int col = 0; col < 32; ++col) { + memset(out + col * 64 + 32, 0, 32 * sizeof(*out)); + } + // Zero out the bottom 64x32 area. + memset(out + 32 * 64, 0, 32 * 64 * sizeof(*out)); + // Re-pack non-zero coeffs in the first 32x32 indices. + for (int col = 1; col < 32; ++col) { + memcpy(out + col * 32, out + col * 64, 32 * sizeof(*out)); + } + } else if (tx_width == 32 && tx_height == 64) { // tx_size == TX_32X64 + // Zero out right 32x32 area. + for (int col = 0; col < 32; ++col) { + memset(out + col * 64 + 32, 0, 32 * sizeof(*out)); + } + // Re-pack non-zero coeffs in the first 32x32 indices. + for (int col = 1; col < 32; ++col) { + memcpy(out + col * 32, out + col * 64, 32 * sizeof(*out)); + } + } else if (tx_width == 64 && tx_height == 32) { // tx_size == TX_64X32 + // Zero out the bottom 32x32 area. + memset(out + 32 * 32, 0, 32 * 32 * sizeof(*out)); + // Note: no repacking needed here. + } else if (tx_width == 16 && tx_height == 64) { // tx_size == TX_16X64 + // Note: no repacking needed here. + // Zero out right 32x16 area. + for (int col = 0; col < 16; ++col) { + memset(out + col * 64 + 32, 0, 32 * sizeof(*out)); + } + // Re-pack non-zero coeffs in the first 32x16 indices. + for (int col = 1; col < 16; ++col) { + memcpy(out + col * 32, out + col * 64, 32 * sizeof(*out)); + } + } else if (tx_width == 64 && tx_height == 16) { // tx_size == TX_64X16 + // Zero out the bottom 16x32 area. + memset(out + 16 * 32, 0, 16 * 32 * sizeof(*out)); + } + + // Apply appropriate scale. + const double amplify_factor = get_amplification_factor(tx_type, tx_size); + for (int c = 0; c < tx_width; ++c) { + for (int r = 0; r < tx_height; ++r) { + out[c * tx_height + r] *= amplify_factor; + } + } +} + +template <typename Type> +void fliplr(Type *dest, int width, int height, int stride) { + for (int r = 0; r < height; ++r) { + for (int c = 0; c < width / 2; ++c) { + const Type tmp = dest[r * stride + c]; + dest[r * stride + c] = dest[r * stride + width - 1 - c]; + dest[r * stride + width - 1 - c] = tmp; + } + } +} + +template <typename Type> +void flipud(Type *dest, int width, int height, int stride) { + for (int c = 0; c < width; ++c) { + for (int r = 0; r < height / 2; ++r) { + const Type tmp = dest[r * stride + c]; + dest[r * stride + c] = dest[(height - 1 - r) * stride + c]; + dest[(height - 1 - r) * stride + c] = tmp; + } + } +} + +template <typename Type> +void fliplrud(Type *dest, int width, int height, int stride) { + for (int r = 0; r < height / 2; ++r) { + for (int c = 0; c < width; ++c) { + const Type tmp = dest[r * stride + c]; + dest[r * stride + c] = dest[(height - 1 - r) * stride + width - 1 - c]; + dest[(height - 1 - r) * stride + width - 1 - c] = tmp; + } + } +} + +template void fliplr<double>(double *dest, int width, int height, int stride); +template void flipud<double>(double *dest, int width, int height, int stride); +template void fliplrud<double>(double *dest, int width, int height, int stride); + +int bd_arr[BD_NUM] = { 8, 10, 12 }; + +int8_t low_range_arr[BD_NUM] = { 18, 32, 32 }; +int8_t high_range_arr[BD_NUM] = { 32, 32, 32 }; + +void txfm_stage_range_check(const int8_t *stage_range, int stage_num, + int8_t cos_bit, int low_range, int high_range) { + for (int i = 0; i < stage_num; ++i) { + EXPECT_LE(stage_range[i], low_range); + ASSERT_LE(stage_range[i] + cos_bit, high_range) << "stage = " << i; + } + for (int i = 0; i < stage_num - 1; ++i) { + // make sure there is no overflow while doing half_btf() + ASSERT_LE(stage_range[i + 1] + cos_bit, high_range) << "stage = " << i; + } +} +} // namespace libaom_test diff --git a/third_party/aom/test/av1_txfm_test.h b/third_party/aom/test/av1_txfm_test.h new file mode 100644 index 0000000000..d285e3d637 --- /dev/null +++ b/third_party/aom/test/av1_txfm_test.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_AV1_TXFM_TEST_H_ +#define AOM_TEST_AV1_TXFM_TEST_H_ + +#include <stdio.h> +#include <stdlib.h> +#ifdef _MSC_VER +#define _USE_MATH_DEFINES +#endif +#include <math.h> + +#include "config/av1_rtcd.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "av1/common/av1_txfm.h" +#include "av1/common/blockd.h" +#include "av1/common/enums.h" + +namespace libaom_test { + +extern const char *tx_type_name[]; + +enum { + TYPE_DCT = 0, + TYPE_ADST, + TYPE_IDTX, + TYPE_IDCT, + TYPE_IADST, + TYPE_LAST +} UENUM1BYTE(TYPE_TXFM); + +int get_txfm1d_size(TX_SIZE tx_size); + +void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1); + +void reference_dct_1d(const double *in, double *out, int size); +void reference_idct_1d(const double *in, double *out, int size); + +void reference_adst_1d(const double *in, double *out, int size); + +void reference_hybrid_1d(double *in, double *out, int size, int type); + +double get_amplification_factor(TX_TYPE tx_type, TX_SIZE tx_size); + +void reference_hybrid_2d(double *in, double *out, TX_TYPE tx_type, + TX_SIZE tx_size); +template <typename Type1, typename Type2> +static double compute_avg_abs_error(const Type1 *a, const Type2 *b, + const int size) { + double error = 0; + for (int i = 0; i < size; i++) { + error += fabs(static_cast<double>(a[i]) - static_cast<double>(b[i])); + } + error = error / size; + return error; +} + +template <typename Type> +void fliplr(Type *dest, int width, int height, int stride); + +template <typename Type> +void flipud(Type *dest, int width, int height, int stride); + +template <typename Type> +void fliplrud(Type *dest, int width, int height, int stride); + +typedef void (*TxfmFunc)(const int32_t *in, int32_t *out, const int8_t cos_bit, + const int8_t *range_bit); + +typedef void (*InvTxfm2dFunc)(const int32_t *, uint16_t *, int, TX_TYPE, int); +typedef void (*LbdInvTxfm2dFunc)(const int32_t *, uint8_t *, int, TX_TYPE, + TX_SIZE, int); + +static const int bd = 10; +static const int input_base = (1 << bd); + +static INLINE bool IsTxSizeTypeValid(TX_SIZE tx_size, TX_TYPE tx_type) { + const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size]; + TxSetType tx_set_type; + if (tx_size_sqr_up > TX_32X32) { + tx_set_type = EXT_TX_SET_DCTONLY; + } else if (tx_size_sqr_up == TX_32X32) { + tx_set_type = EXT_TX_SET_DCT_IDTX; + } else { + tx_set_type = EXT_TX_SET_ALL16; + } + return av1_ext_tx_used[tx_set_type][tx_type] != 0; +} + +#if CONFIG_AV1_ENCODER +#if !CONFIG_REALTIME_ONLY +static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = { + av1_fwd_txfm2d_4x4_c, av1_fwd_txfm2d_8x8_c, av1_fwd_txfm2d_16x16_c, + av1_fwd_txfm2d_32x32_c, av1_fwd_txfm2d_64x64_c, av1_fwd_txfm2d_4x8_c, + av1_fwd_txfm2d_8x4_c, av1_fwd_txfm2d_8x16_c, av1_fwd_txfm2d_16x8_c, + av1_fwd_txfm2d_16x32_c, av1_fwd_txfm2d_32x16_c, av1_fwd_txfm2d_32x64_c, + av1_fwd_txfm2d_64x32_c, av1_fwd_txfm2d_4x16_c, av1_fwd_txfm2d_16x4_c, + av1_fwd_txfm2d_8x32_c, av1_fwd_txfm2d_32x8_c, av1_fwd_txfm2d_16x64_c, + av1_fwd_txfm2d_64x16_c, +}; +#else +static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = { + av1_fwd_txfm2d_4x4_c, + av1_fwd_txfm2d_8x8_c, + av1_fwd_txfm2d_16x16_c, + av1_fwd_txfm2d_32x32_c, + av1_fwd_txfm2d_64x64_c, + av1_fwd_txfm2d_4x8_c, + av1_fwd_txfm2d_8x4_c, + av1_fwd_txfm2d_8x16_c, + av1_fwd_txfm2d_16x8_c, + av1_fwd_txfm2d_16x32_c, + av1_fwd_txfm2d_32x16_c, + av1_fwd_txfm2d_32x64_c, + av1_fwd_txfm2d_64x32_c, + nullptr, + av1_fwd_txfm2d_16x4_c, + nullptr, + nullptr, + nullptr, + nullptr, +}; +#endif +#endif + +static const InvTxfm2dFunc inv_txfm_func_ls[TX_SIZES_ALL] = { + av1_inv_txfm2d_add_4x4_c, av1_inv_txfm2d_add_8x8_c, + av1_inv_txfm2d_add_16x16_c, av1_inv_txfm2d_add_32x32_c, + av1_inv_txfm2d_add_64x64_c, av1_inv_txfm2d_add_4x8_c, + av1_inv_txfm2d_add_8x4_c, av1_inv_txfm2d_add_8x16_c, + av1_inv_txfm2d_add_16x8_c, av1_inv_txfm2d_add_16x32_c, + av1_inv_txfm2d_add_32x16_c, av1_inv_txfm2d_add_32x64_c, + av1_inv_txfm2d_add_64x32_c, av1_inv_txfm2d_add_4x16_c, + av1_inv_txfm2d_add_16x4_c, av1_inv_txfm2d_add_8x32_c, + av1_inv_txfm2d_add_32x8_c, av1_inv_txfm2d_add_16x64_c, + av1_inv_txfm2d_add_64x16_c, +}; + +#define BD_NUM 3 + +extern int bd_arr[]; +extern int8_t low_range_arr[]; +extern int8_t high_range_arr[]; + +void txfm_stage_range_check(const int8_t *stage_range, int stage_num, + const int8_t cos_bit, int low_range, + int high_range); +} // namespace libaom_test +#endif // AOM_TEST_AV1_TXFM_TEST_H_ diff --git a/third_party/aom/test/av1_wedge_utils_test.cc b/third_party/aom/test/av1_wedge_utils_test.cc new file mode 100644 index 0000000000..1055ff35b2 --- /dev/null +++ b/third_party/aom/test/av1_wedge_utils_test.cc @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "aom_dsp/aom_dsp_common.h" + +#include "av1/common/enums.h" + +#include "test/acm_random.h" +#include "test/function_equivalence_test.h" +#include "test/register_state_check.h" + +#define WEDGE_WEIGHT_BITS 6 +#define MAX_MASK_VALUE (1 << (WEDGE_WEIGHT_BITS)) + +using libaom_test::ACMRandom; +using libaom_test::FunctionEquivalenceTest; + +namespace { + +static const int16_t kInt13Max = (1 << 12) - 1; + +////////////////////////////////////////////////////////////////////////////// +// av1_wedge_sse_from_residuals - functionality +////////////////////////////////////////////////////////////////////////////// + +class WedgeUtilsSSEFuncTest : public testing::Test { + protected: + WedgeUtilsSSEFuncTest() : rng_(ACMRandom::DeterministicSeed()) {} + + static const int kIterations = 1000; + + ACMRandom rng_; +}; + +static void equiv_blend_residuals(int16_t *r, const int16_t *r0, + const int16_t *r1, const uint8_t *m, int N) { + for (int i = 0; i < N; i++) { + const int32_t m0 = m[i]; + const int32_t m1 = MAX_MASK_VALUE - m0; + const int16_t R = m0 * r0[i] + m1 * r1[i]; + // Note that this rounding is designed to match the result + // you would get when actually blending the 2 predictors and computing + // the residuals. + r[i] = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS); + } +} + +static uint64_t equiv_sse_from_residuals(const int16_t *r0, const int16_t *r1, + const uint8_t *m, int N) { + uint64_t acc = 0; + for (int i = 0; i < N; i++) { + const int32_t m0 = m[i]; + const int32_t m1 = MAX_MASK_VALUE - m0; + const int16_t R = m0 * r0[i] + m1 * r1[i]; + const int32_t r = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS); + acc += r * r; + } + return acc; +} + +TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingEquiv) { + DECLARE_ALIGNED(32, uint8_t, s[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, p0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, p1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, p[MAX_SB_SQUARE]); + + DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r_ref[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r_tst[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + s[i] = rng_.Rand8(); + m[i] = rng_(MAX_MASK_VALUE + 1); + } + + const int w = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3); + const int h = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3); + const int N = w * h; + + for (int j = 0; j < N; j++) { + p0[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX); + p1[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX); + } + + aom_blend_a64_mask(p, w, p0, w, p1, w, m, w, w, h, 0, 0); + + aom_subtract_block(h, w, r0, w, s, w, p0, w); + aom_subtract_block(h, w, r1, w, s, w, p1, w); + + aom_subtract_block(h, w, r_ref, w, s, w, p, w); + equiv_blend_residuals(r_tst, r0, r1, m, N); + + for (int i = 0; i < N; ++i) ASSERT_EQ(r_ref[i], r_tst[i]); + + uint64_t ref_sse = aom_sum_squares_i16(r_ref, N); + uint64_t tst_sse = equiv_sse_from_residuals(r0, r1, m, N); + + ASSERT_EQ(ref_sse, tst_sse); + } +} + +static uint64_t sse_from_residuals(const int16_t *r0, const int16_t *r1, + const uint8_t *m, int N) { + uint64_t acc = 0; + for (int i = 0; i < N; i++) { + const int32_t m0 = m[i]; + const int32_t m1 = MAX_MASK_VALUE - m0; + const int32_t r = m0 * r0[i] + m1 * r1[i]; + acc += r * r; + } + return ROUND_POWER_OF_TWO(acc, 2 * WEDGE_WEIGHT_BITS); +} + +TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingMethod) { + DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r1[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN; + d[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN; + m[i] = rng_(MAX_MASK_VALUE + 1); + } + + const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); + + for (int i = 0; i < N; i++) r0[i] = r1[i] + d[i]; + + const uint64_t ref_res = sse_from_residuals(r0, r1, m, N); + const uint64_t tst_res = av1_wedge_sse_from_residuals(r1, d, m, N); + + ASSERT_EQ(ref_res, tst_res); + } +} + +////////////////////////////////////////////////////////////////////////////// +// av1_wedge_sse_from_residuals - optimizations +////////////////////////////////////////////////////////////////////////////// + +typedef uint64_t (*FSSE)(const int16_t *r1, const int16_t *d, const uint8_t *m, + int N); +typedef libaom_test::FuncParam<FSSE> TestFuncsFSSE; + +class WedgeUtilsSSEOptTest : public FunctionEquivalenceTest<FSSE> { + protected: + static const int kIterations = 10000; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(WedgeUtilsSSEOptTest); + +TEST_P(WedgeUtilsSSEOptTest, RandomValues) { + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max; + d[i] = rng_(2 * kInt13Max + 1) - kInt13Max; + m[i] = rng_(MAX_MASK_VALUE + 1); + } + + const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); + + const uint64_t ref_res = params_.ref_func(r1, d, m, N); + uint64_t tst_res; + API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(WedgeUtilsSSEOptTest, ExtremeValues) { + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + if (rng_(2)) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = kInt13Max; + } else { + for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = -kInt13Max; + } + + if (rng_(2)) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = kInt13Max; + } else { + for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = -kInt13Max; + } + + for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE; + + const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); + + const uint64_t ref_res = params_.ref_func(r1, d, m, N); + uint64_t tst_res; + API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +////////////////////////////////////////////////////////////////////////////// +// av1_wedge_sign_from_residuals +////////////////////////////////////////////////////////////////////////////// + +typedef int8_t (*FSign)(const int16_t *ds, const uint8_t *m, int N, + int64_t limit); +typedef libaom_test::FuncParam<FSign> TestFuncsFSign; + +class WedgeUtilsSignOptTest : public FunctionEquivalenceTest<FSign> { + protected: + static const int kIterations = 10000; + static const int kMaxSize = 8196; // Size limited by SIMD implementation. +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(WedgeUtilsSignOptTest); + +TEST_P(WedgeUtilsSignOptTest, RandomValues) { + DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = rng_(2 * kInt13Max + 1) - kInt13Max; + r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max; + m[i] = rng_(MAX_MASK_VALUE + 1); + } + + const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE); + const int N = 64 * (rng_(maxN / 64 - 1) + 1); + + int64_t limit; + limit = (int64_t)aom_sum_squares_i16(r0, N); + limit -= (int64_t)aom_sum_squares_i16(r1, N); + limit *= (1 << WEDGE_WEIGHT_BITS) / 2; + + for (int i = 0; i < N; i++) + ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX); + + const int ref_res = params_.ref_func(ds, m, N, limit); + int tst_res; + API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(WedgeUtilsSignOptTest, ExtremeValues) { + DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + switch (rng_(4)) { + case 0: + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = 0; + r1[i] = kInt13Max; + } + break; + case 1: + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = kInt13Max; + r1[i] = 0; + } + break; + case 2: + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = 0; + r1[i] = -kInt13Max; + } + break; + default: + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + r0[i] = -kInt13Max; + r1[i] = 0; + } + break; + } + + for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE; + + const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE); + const int N = 64 * (rng_(maxN / 64 - 1) + 1); + + int64_t limit; + limit = (int64_t)aom_sum_squares_i16(r0, N); + limit -= (int64_t)aom_sum_squares_i16(r1, N); + limit *= (1 << WEDGE_WEIGHT_BITS) / 2; + + for (int i = 0; i < N; i++) + ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX); + + const int ref_res = params_.ref_func(ds, m, N, limit); + int tst_res; + API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +////////////////////////////////////////////////////////////////////////////// +// av1_wedge_compute_delta_squares +////////////////////////////////////////////////////////////////////////////// + +typedef void (*FDS)(int16_t *d, const int16_t *a, const int16_t *b, int N); +typedef libaom_test::FuncParam<FDS> TestFuncsFDS; + +class WedgeUtilsDeltaSquaresOptTest : public FunctionEquivalenceTest<FDS> { + protected: + static const int kIterations = 10000; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(WedgeUtilsDeltaSquaresOptTest); + +TEST_P(WedgeUtilsDeltaSquaresOptTest, RandomValues) { + DECLARE_ALIGNED(32, int16_t, a[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, b[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d_ref[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int16_t, d_tst[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + a[i] = rng_.Rand16Signed(); + b[i] = rng_(2 * INT16_MAX + 1) - INT16_MAX; + } + + const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); + + memset(&d_ref, INT16_MAX, sizeof(d_ref)); + memset(&d_tst, INT16_MAX, sizeof(d_tst)); + + params_.ref_func(d_ref, a, b, N); + API_REGISTER_STATE_CHECK(params_.tst_func(d_tst, a, b, N)); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) ASSERT_EQ(d_ref[i], d_tst[i]); + } +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, WedgeUtilsSSEOptTest, + ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c, + av1_wedge_sse_from_residuals_sse2))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, WedgeUtilsSignOptTest, + ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c, + av1_wedge_sign_from_residuals_sse2))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, WedgeUtilsDeltaSquaresOptTest, + ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c, + av1_wedge_compute_delta_squares_sse2))); +#endif // HAVE_SSE2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, WedgeUtilsSSEOptTest, + ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c, + av1_wedge_sse_from_residuals_neon))); + +INSTANTIATE_TEST_SUITE_P( + NEON, WedgeUtilsSignOptTest, + ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c, + av1_wedge_sign_from_residuals_neon))); + +INSTANTIATE_TEST_SUITE_P( + NEON, WedgeUtilsDeltaSquaresOptTest, + ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c, + av1_wedge_compute_delta_squares_neon))); +#endif // HAVE_NEON + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, WedgeUtilsSSEOptTest, + ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_sse2, + av1_wedge_sse_from_residuals_avx2))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, WedgeUtilsSignOptTest, + ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_sse2, + av1_wedge_sign_from_residuals_avx2))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, WedgeUtilsDeltaSquaresOptTest, + ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_sse2, + av1_wedge_compute_delta_squares_avx2))); +#endif // HAVE_AVX2 + +} // namespace diff --git a/third_party/aom/test/avg_test.cc b/third_party/aom/test/avg_test.cc new file mode 100644 index 0000000000..6f4c2ff332 --- /dev/null +++ b/third_party/aom/test/avg_test.cc @@ -0,0 +1,1150 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <ostream> +#include <string> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { + +using libaom_test::ACMRandom; + +template <typename Pixel> +class AverageTestBase : public ::testing::Test { + public: + AverageTestBase(int width, int height, int bit_depth = 8) + : width_(width), height_(height), source_data_(nullptr), + source_stride_(0), bit_depth_(bit_depth) {} + + void TearDown() override { + aom_free(source_data_); + source_data_ = nullptr; + } + + protected: + // Handle blocks up to 4 blocks 64x64 with stride up to 128 + static const int kDataAlignment = 16; + static const int kDataBlockWidth = 128; + static const int kDataBlockHeight = 128; + static const int kDataBlockSize = kDataBlockWidth * kDataBlockHeight; + + void SetUp() override { + const testing::TestInfo *const test_info = + testing::UnitTest::GetInstance()->current_test_info(); + // Skip the speed test for C code as the baseline uses the same function. + if (std::string(test_info->test_suite_name()).find("C/") == 0 && + std::string(test_info->name()).find("DISABLED_Speed") != + std::string::npos) { + GTEST_SKIP(); + } + + source_data_ = static_cast<Pixel *>( + aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_NE(source_data_, nullptr); + memset(source_data_, 0, kDataBlockSize * sizeof(source_data_[0])); + source_stride_ = (width_ + 31) & ~31; + bit_depth_ = 8; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + // Sum Pixels + static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) { + unsigned int average = 0; + for (int h = 0; h < 8; ++h) { + for (int w = 0; w < 8; ++w) average += source[h * pitch + w]; + } + return (average + 32) >> 6; + } + + static void ReferenceAverage8x8_quad(const uint8_t *source, int pitch, + int x16_idx, int y16_idx, int *avg) { + for (int k = 0; k < 4; k++) { + int average = 0; + int x8_idx = x16_idx + ((k & 1) << 3); + int y8_idx = y16_idx + ((k >> 1) << 3); + for (int h = 0; h < 8; ++h) { + for (int w = 0; w < 8; ++w) + average += source[(h + y8_idx) * pitch + w + x8_idx]; + } + avg[k] = (average + 32) >> 6; + } + } + + static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) { + unsigned int average = 0; + for (int h = 0; h < 4; ++h) { + for (int w = 0; w < 4; ++w) average += source[h * pitch + w]; + } + return (average + 8) >> 4; + } + + void FillConstant(Pixel fill_constant) { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = fill_constant; + } + } + + void FillRandom() { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1); + } + } + + int width_, height_; + Pixel *source_data_; + int source_stride_; + int bit_depth_; + + ACMRandom rnd_; +}; +typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch); + +// Arguments: width, height, bit_depth, buffer start offset, block size, avg +// function. +typedef std::tuple<int, int, int, int, int, AverageFunction> AvgFunc; + +template <typename Pixel> +class AverageTest : public AverageTestBase<Pixel>, + public ::testing::WithParamInterface<AvgFunc> { + public: + AverageTest() + : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {} + + protected: + using AverageTestBase<Pixel>::source_data_; + using AverageTestBase<Pixel>::source_stride_; + using AverageTestBase<Pixel>::ReferenceAverage8x8; + using AverageTestBase<Pixel>::ReferenceAverage4x4; + using AverageTestBase<Pixel>::FillConstant; + using AverageTestBase<Pixel>::FillRandom; + + void CheckAverages() { + const int block_size = GET_PARAM(4); + unsigned int expected = 0; + + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const Pixel *const src = source_data_ + GET_PARAM(3); + if (block_size == 8) { + expected = ReferenceAverage8x8(src, source_stride_); + } else if (block_size == 4) { + expected = ReferenceAverage4x4(src, source_stride_); + } + + aom_usec_timer timer; + unsigned int actual; + if (sizeof(Pixel) == 2) { +#if CONFIG_AV1_HIGHBITDEPTH + AverageFunction avg_c = + (block_size == 8) ? aom_highbd_avg_8x8_c : aom_highbd_avg_4x4_c; + // To avoid differences in optimization with the local Reference*() + // functions the C implementation is used as a baseline. + aom_usec_timer_start(&timer); + avg_c(CONVERT_TO_BYTEPTR(src), source_stride_); + aom_usec_timer_mark(&timer); + ref_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + AverageFunction avg_opt = GET_PARAM(5); + API_REGISTER_STATE_CHECK( + aom_usec_timer_start(&timer); + actual = avg_opt(CONVERT_TO_BYTEPTR(src), source_stride_); + aom_usec_timer_mark(&timer)); +#endif // CONFIG_AV1_HIGHBITDEPTH + } else { + ASSERT_EQ(sizeof(Pixel), 1u); + + AverageFunction avg_c = (block_size == 8) ? aom_avg_8x8_c : aom_avg_4x4_c; + aom_usec_timer_start(&timer); + avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_); + aom_usec_timer_mark(&timer); + ref_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + AverageFunction avg_opt = GET_PARAM(5); + API_REGISTER_STATE_CHECK( + aom_usec_timer_start(&timer); + actual = + avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_); + aom_usec_timer_mark(&timer)); + } + opt_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + EXPECT_EQ(expected, actual); + } + + void TestConstantValue(Pixel value) { + FillConstant(value); + CheckAverages(); + } + + void TestRandom(int iterations = 1000) { + for (int i = 0; i < iterations; i++) { + FillRandom(); + CheckAverages(); + } + } + + void PrintTimingStats() const { + printf( + "block_size = %d \t ref_time = %d \t simd_time = %d \t Gain = %4.2f\n", + GET_PARAM(4), static_cast<int>(ref_elapsed_time_), + static_cast<int>(opt_elapsed_time_), + (static_cast<float>(ref_elapsed_time_) / + static_cast<float>(opt_elapsed_time_))); + } + + int64_t ref_elapsed_time_ = 0; + int64_t opt_elapsed_time_ = 0; +}; + +typedef void (*AverageFunction_8x8_quad)(const uint8_t *s, int pitch, int x_idx, + int y_idx, int *avg); + +// Arguments: width, height, bit_depth, buffer start offset, block size, avg +// function. +typedef std::tuple<int, int, int, int, int, AverageFunction_8x8_quad> + AvgFunc_8x8_quad; + +template <typename Pixel> +class AverageTest_8x8_quad + : public AverageTestBase<Pixel>, + public ::testing::WithParamInterface<AvgFunc_8x8_quad> { + public: + AverageTest_8x8_quad() + : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {} + + protected: + using AverageTestBase<Pixel>::source_data_; + using AverageTestBase<Pixel>::source_stride_; + using AverageTestBase<Pixel>::ReferenceAverage8x8_quad; + using AverageTestBase<Pixel>::FillConstant; + using AverageTestBase<Pixel>::FillRandom; + + void CheckAveragesAt(int iterations, int x16_idx, int y16_idx) { + ASSERT_EQ(sizeof(Pixel), 1u); + const int block_size = GET_PARAM(4); + (void)block_size; + int expected[4] = { 0 }; + + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const Pixel *const src = source_data_ + GET_PARAM(3); + ReferenceAverage8x8_quad(src, source_stride_, x16_idx, y16_idx, expected); + + aom_usec_timer timer; + int expected_c[4] = { 0 }; + int actual[4] = { 0 }; + AverageFunction_8x8_quad avg_c = aom_avg_8x8_quad_c; + aom_usec_timer_start(&timer); + for (int i = 0; i < iterations; i++) { + avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx, + y16_idx, expected_c); + } + aom_usec_timer_mark(&timer); + ref_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + AverageFunction_8x8_quad avg_opt = GET_PARAM(5); + aom_usec_timer_start(&timer); + for (int i = 0; i < iterations; i++) { + avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx, + y16_idx, actual); + } + aom_usec_timer_mark(&timer); + opt_elapsed_time_ += aom_usec_timer_elapsed(&timer); + + for (int k = 0; k < 4; k++) { + EXPECT_EQ(expected[k], actual[k]); + EXPECT_EQ(expected_c[k], actual[k]); + } + + // Print scaling information only when Speed test is called. + if (iterations > 1) { + printf("ref_time = %d \t simd_time = %d \t Gain = %4.2f\n", + static_cast<int>(ref_elapsed_time_), + static_cast<int>(opt_elapsed_time_), + (static_cast<float>(ref_elapsed_time_) / + static_cast<float>(opt_elapsed_time_))); + } + } + + void CheckAverages() { + for (int x16_idx = 0; x16_idx < this->kDataBlockWidth / 8; x16_idx += 2) + for (int y16_idx = 0; y16_idx < this->kDataBlockHeight / 8; y16_idx += 2) + CheckAveragesAt(1, x16_idx, y16_idx); + } + + void TestConstantValue(Pixel value) { + FillConstant(value); + CheckAverages(); + } + + void TestRandom() { + FillRandom(); + CheckAverages(); + } + + void TestSpeed() { + FillRandom(); + CheckAveragesAt(1000000, 0, 0); + } + + int64_t ref_elapsed_time_ = 0; + int64_t opt_elapsed_time_ = 0; +}; + +using AverageTest8bpp = AverageTest<uint8_t>; + +TEST_P(AverageTest8bpp, MinValue) { TestConstantValue(0); } + +TEST_P(AverageTest8bpp, MaxValue) { TestConstantValue(255); } + +TEST_P(AverageTest8bpp, Random) { TestRandom(); } + +TEST_P(AverageTest8bpp, DISABLED_Speed) { + TestRandom(1000000); + PrintTimingStats(); +} + +using AvgTest8bpp_avg_8x8_quad = AverageTest_8x8_quad<uint8_t>; + +TEST_P(AvgTest8bpp_avg_8x8_quad, MinValue) { TestConstantValue(0); } + +TEST_P(AvgTest8bpp_avg_8x8_quad, MaxValue) { TestConstantValue(255); } + +TEST_P(AvgTest8bpp_avg_8x8_quad, Random) { TestRandom(); } + +TEST_P(AvgTest8bpp_avg_8x8_quad, DISABLED_Speed) { TestSpeed(); } + +#if CONFIG_AV1_HIGHBITDEPTH +using AverageTestHbd = AverageTest<uint16_t>; + +TEST_P(AverageTestHbd, MinValue) { TestConstantValue(0); } + +TEST_P(AverageTestHbd, MaxValue10bit) { TestConstantValue(1023); } +TEST_P(AverageTestHbd, MaxValue12bit) { TestConstantValue(4095); } + +TEST_P(AverageTestHbd, Random) { TestRandom(); } + +TEST_P(AverageTestHbd, DISABLED_Speed) { + TestRandom(1000000); + PrintTimingStats(); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +typedef void (*IntProRowFunc)(int16_t *hbuf, uint8_t const *ref, + const int ref_stride, const int width, + const int height, int norm_factor); + +// Params: width, height, asm function, c function. +typedef std::tuple<int, int, IntProRowFunc, IntProRowFunc> IntProRowParam; + +class IntProRowTest : public AverageTestBase<uint8_t>, + public ::testing::WithParamInterface<IntProRowParam> { + public: + IntProRowTest() + : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), hbuf_asm_(nullptr), + hbuf_c_(nullptr) { + asm_func_ = GET_PARAM(2); + c_func_ = GET_PARAM(3); + } + + void set_norm_factor() { + if (height_ == 128) + norm_factor_ = 6; + else if (height_ == 64) + norm_factor_ = 5; + else if (height_ == 32) + norm_factor_ = 4; + else if (height_ == 16) + norm_factor_ = 3; + } + + protected: + void SetUp() override { + source_data_ = static_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_NE(source_data_, nullptr); + + hbuf_asm_ = static_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * width_)); + ASSERT_NE(hbuf_asm_, nullptr); + hbuf_c_ = static_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * width_)); + ASSERT_NE(hbuf_c_, nullptr); + } + + void TearDown() override { + aom_free(source_data_); + source_data_ = nullptr; + aom_free(hbuf_c_); + hbuf_c_ = nullptr; + aom_free(hbuf_asm_); + hbuf_asm_ = nullptr; + } + + void RunComparison() { + set_norm_factor(); + API_REGISTER_STATE_CHECK( + c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_)); + API_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, width_, width_, + height_, norm_factor_)); + EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_)) + << "Output mismatch\n"; + } + + void RunSpeedTest() { + const int numIter = 5000000; + set_norm_factor(); + printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_, + numIter); + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + for (int i = 0; i < numIter; i++) { + c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_); + } + aom_usec_timer_mark(&c_timer_); + + aom_usec_timer asm_timer_; + aom_usec_timer_start(&asm_timer_); + + for (int i = 0; i < numIter; i++) { + asm_func_(hbuf_asm_, source_data_, width_, width_, height_, norm_factor_); + } + aom_usec_timer_mark(&asm_timer_); + + const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + const int asm_sum_time = + static_cast<int>(aom_usec_timer_elapsed(&asm_timer_)); + + printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time, + asm_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time))); + + EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_)) + << "Output mismatch\n"; + } + + private: + IntProRowFunc asm_func_; + IntProRowFunc c_func_; + int16_t *hbuf_asm_; + int16_t *hbuf_c_; + int norm_factor_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProRowTest); + +typedef void (*IntProColFunc)(int16_t *vbuf, uint8_t const *ref, + const int ref_stride, const int width, + const int height, int norm_factor); + +// Params: width, height, asm function, c function. +typedef std::tuple<int, int, IntProColFunc, IntProColFunc> IntProColParam; + +class IntProColTest : public AverageTestBase<uint8_t>, + public ::testing::WithParamInterface<IntProColParam> { + public: + IntProColTest() + : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), vbuf_asm_(nullptr), + vbuf_c_(nullptr) { + asm_func_ = GET_PARAM(2); + c_func_ = GET_PARAM(3); + } + + protected: + void SetUp() override { + source_data_ = static_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_NE(source_data_, nullptr); + + vbuf_asm_ = static_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*vbuf_asm_) * width_)); + ASSERT_NE(vbuf_asm_, nullptr); + vbuf_c_ = static_cast<int16_t *>( + aom_memalign(kDataAlignment, sizeof(*vbuf_c_) * width_)); + ASSERT_NE(vbuf_c_, nullptr); + } + + void TearDown() override { + aom_free(source_data_); + source_data_ = nullptr; + aom_free(vbuf_c_); + vbuf_c_ = nullptr; + aom_free(vbuf_asm_); + vbuf_asm_ = nullptr; + } + + void RunComparison() { + int norm_factor_ = 3 + (width_ >> 5); + API_REGISTER_STATE_CHECK( + c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_)); + API_REGISTER_STATE_CHECK(asm_func_(vbuf_asm_, source_data_, width_, width_, + height_, norm_factor_)); + EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_)) + << "Output mismatch\n"; + } + void RunSpeedTest() { + const int numIter = 5000000; + printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_, + numIter); + int norm_factor_ = 3 + (width_ >> 5); + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + for (int i = 0; i < numIter; i++) { + c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_); + } + aom_usec_timer_mark(&c_timer_); + + aom_usec_timer asm_timer_; + aom_usec_timer_start(&asm_timer_); + + for (int i = 0; i < numIter; i++) { + asm_func_(vbuf_asm_, source_data_, width_, width_, height_, norm_factor_); + } + aom_usec_timer_mark(&asm_timer_); + + const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + const int asm_sum_time = + static_cast<int>(aom_usec_timer_elapsed(&asm_timer_)); + + printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time, + asm_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time))); + + EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_)) + << "Output mismatch\n"; + } + + private: + IntProColFunc asm_func_; + IntProColFunc c_func_; + int16_t *vbuf_asm_; + int16_t *vbuf_c_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProColTest); + +TEST_P(IntProRowTest, MinValue) { + FillConstant(0); + RunComparison(); +} + +TEST_P(IntProRowTest, MaxValue) { + FillConstant(255); + RunComparison(); +} + +TEST_P(IntProRowTest, Random) { + FillRandom(); + RunComparison(); +} + +TEST_P(IntProRowTest, DISABLED_Speed) { + FillRandom(); + RunSpeedTest(); +} + +TEST_P(IntProColTest, MinValue) { + FillConstant(0); + RunComparison(); +} + +TEST_P(IntProColTest, MaxValue) { + FillConstant(255); + RunComparison(); +} + +TEST_P(IntProColTest, Random) { + FillRandom(); + RunComparison(); +} + +TEST_P(IntProColTest, DISABLED_Speed) { + FillRandom(); + RunSpeedTest(); +} +class VectorVarTestBase : public ::testing::Test { + public: + explicit VectorVarTestBase(int bwl) { m_bwl = bwl; } + VectorVarTestBase() = default; + ~VectorVarTestBase() override = default; + + protected: + static const int kDataAlignment = 16; + + void SetUp() override { + width = 4 << m_bwl; + + ref_vector = static_cast<int16_t *>( + aom_memalign(kDataAlignment, width * sizeof(ref_vector[0]))); + ASSERT_NE(ref_vector, nullptr); + src_vector = static_cast<int16_t *>( + aom_memalign(kDataAlignment, width * sizeof(src_vector[0]))); + ASSERT_NE(src_vector, nullptr); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + void TearDown() override { + aom_free(ref_vector); + ref_vector = nullptr; + aom_free(src_vector); + src_vector = nullptr; + } + + void FillConstant(int16_t fill_constant_ref, int16_t fill_constant_src) { + for (int i = 0; i < width; ++i) { + ref_vector[i] = fill_constant_ref; + src_vector[i] = fill_constant_src; + } + } + + void FillRandom() { + for (int i = 0; i < width; ++i) { + ref_vector[i] = + rnd_.Rand16() % max_range; // acc. aom_vector_var_c brief. + src_vector[i] = rnd_.Rand16() % max_range; + } + } + + int width; + int m_bwl; + int16_t *ref_vector; + int16_t *src_vector; + ACMRandom rnd_; + + static const int max_range = 510; + static const int num_random_cmp = 50; +}; + +typedef int (*VectorVarFunc)(const int16_t *ref, const int16_t *src, + const int bwl); + +typedef std::tuple<int, VectorVarFunc, VectorVarFunc> VecVarFunc; + +class VectorVarTest : public VectorVarTestBase, + public ::testing::WithParamInterface<VecVarFunc> { + public: + VectorVarTest() + : VectorVarTestBase(GET_PARAM(0)), c_func(GET_PARAM(1)), + simd_func(GET_PARAM(2)) {} + + protected: + int calcVarC() { return c_func(ref_vector, src_vector, m_bwl); } + int calcVarSIMD() { return simd_func(ref_vector, src_vector, m_bwl); } + + VectorVarFunc c_func; + VectorVarFunc simd_func; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VectorVarTest); + +TEST_P(VectorVarTest, MaxVar) { + FillConstant(0, max_range); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, MaxVarRev) { + FillConstant(max_range, 0); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, ZeroDiff) { + FillConstant(0, 0); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, ZeroDiff2) { + FillConstant(max_range, max_range); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, Constant) { + FillConstant(30, 90); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); +} +TEST_P(VectorVarTest, Random) { + for (size_t i = 0; i < num_random_cmp; i++) { + FillRandom(); + int c_var = calcVarC(); + int simd_var = calcVarSIMD(); + ASSERT_EQ(c_var, simd_var); + } +} +TEST_P(VectorVarTest, DISABLED_Speed) { + FillRandom(); + const int numIter = 5000000; + printf("Width = %d number of iteration is %d \n", width, numIter); + + int sum_c_var = 0; + int c_var = 0; + + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + for (size_t i = 0; i < numIter; i++) { + c_var = calcVarC(); + sum_c_var += c_var; + } + aom_usec_timer_mark(&c_timer_); + + int simd_var = 0; + int sum_simd_var = 0; + aom_usec_timer simd_timer_; + aom_usec_timer_start(&simd_timer_); + for (size_t i = 0; i < numIter; i++) { + simd_var = calcVarSIMD(); + sum_simd_var += simd_var; + } + aom_usec_timer_mark(&simd_timer_); + + const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + const int simd_sum_time = + static_cast<int>(aom_usec_timer_elapsed(&simd_timer_)); + + printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time, + simd_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time))); + + EXPECT_EQ(c_var, simd_var) << "Output mismatch \n"; + EXPECT_EQ(sum_c_var, sum_simd_var) << "Output mismatch \n"; +} + +using std::make_tuple; + +INSTANTIATE_TEST_SUITE_P( + C, AverageTest8bpp, + ::testing::Values(make_tuple(16, 16, 8, 1, 8, &aom_avg_8x8_c), + make_tuple(16, 16, 8, 1, 4, &aom_avg_4x4_c))); + +INSTANTIATE_TEST_SUITE_P( + C, AvgTest8bpp_avg_8x8_quad, + ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_c), + make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_c), + make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_c))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AverageTest8bpp, + ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_sse2), + make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_sse2), + make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_sse2), + make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_sse2), + make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_sse2), + make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_sse2))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, AvgTest8bpp_avg_8x8_quad, + ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_sse2), + make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_sse2), + make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_sse2))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, IntProRowTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_row_sse2, &aom_int_pro_row_c), + make_tuple(32, 32, &aom_int_pro_row_sse2, &aom_int_pro_row_c), + make_tuple(64, 64, &aom_int_pro_row_sse2, &aom_int_pro_row_c), + make_tuple(128, 128, &aom_int_pro_row_sse2, &aom_int_pro_row_c))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, IntProColTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_col_sse2, &aom_int_pro_col_c), + make_tuple(32, 32, &aom_int_pro_col_sse2, &aom_int_pro_col_c), + make_tuple(64, 64, &aom_int_pro_col_sse2, &aom_int_pro_col_c), + make_tuple(128, 128, &aom_int_pro_col_sse2, &aom_int_pro_col_c))); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AvgTest8bpp_avg_8x8_quad, + ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_avx2), + make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_avx2), + make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_avx2))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, IntProRowTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_row_avx2, &aom_int_pro_row_c), + make_tuple(32, 32, &aom_int_pro_row_avx2, &aom_int_pro_row_c), + make_tuple(64, 64, &aom_int_pro_row_avx2, &aom_int_pro_row_c), + make_tuple(128, 128, &aom_int_pro_row_avx2, &aom_int_pro_row_c))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, IntProColTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_col_avx2, &aom_int_pro_col_c), + make_tuple(32, 32, &aom_int_pro_col_avx2, &aom_int_pro_col_c), + make_tuple(64, 64, &aom_int_pro_col_avx2, &aom_int_pro_col_c), + make_tuple(128, 128, &aom_int_pro_col_avx2, &aom_int_pro_col_c))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AverageTest8bpp, + ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_neon), + make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_neon), + make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_neon), + make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_neon), + make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_neon), + make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_neon))); +INSTANTIATE_TEST_SUITE_P( + NEON, IntProRowTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_row_neon, &aom_int_pro_row_c), + make_tuple(32, 32, &aom_int_pro_row_neon, &aom_int_pro_row_c), + make_tuple(64, 64, &aom_int_pro_row_neon, &aom_int_pro_row_c), + make_tuple(128, 128, &aom_int_pro_row_neon, &aom_int_pro_row_c))); + +INSTANTIATE_TEST_SUITE_P( + NEON, IntProColTest, + ::testing::Values( + make_tuple(16, 16, &aom_int_pro_col_neon, &aom_int_pro_col_c), + make_tuple(32, 32, &aom_int_pro_col_neon, &aom_int_pro_col_c), + make_tuple(64, 64, &aom_int_pro_col_neon, &aom_int_pro_col_c), + make_tuple(128, 128, &aom_int_pro_col_neon, &aom_int_pro_col_c))); + +INSTANTIATE_TEST_SUITE_P( + NEON, AvgTest8bpp_avg_8x8_quad, + ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_neon), + make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_neon), + make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_neon))); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +INSTANTIATE_TEST_SUITE_P( + C, AverageTestHbd, + ::testing::Values(make_tuple(16, 16, 10, 1, 8, &aom_highbd_avg_8x8_c), + make_tuple(16, 16, 10, 1, 4, &aom_highbd_avg_4x4_c), + make_tuple(16, 16, 12, 1, 8, &aom_highbd_avg_8x8_c), + make_tuple(16, 16, 12, 1, 4, &aom_highbd_avg_4x4_c))); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AverageTestHbd, + ::testing::Values(make_tuple(16, 16, 10, 0, 4, &aom_highbd_avg_4x4_neon), + make_tuple(16, 16, 10, 5, 4, &aom_highbd_avg_4x4_neon), + make_tuple(32, 32, 10, 15, 4, &aom_highbd_avg_4x4_neon), + make_tuple(16, 16, 12, 0, 4, &aom_highbd_avg_4x4_neon), + make_tuple(16, 16, 12, 5, 4, &aom_highbd_avg_4x4_neon), + make_tuple(32, 32, 12, 15, 4, &aom_highbd_avg_4x4_neon), + make_tuple(16, 16, 10, 0, 8, &aom_highbd_avg_8x8_neon), + make_tuple(16, 16, 10, 5, 8, &aom_highbd_avg_8x8_neon), + make_tuple(32, 32, 10, 15, 8, &aom_highbd_avg_8x8_neon), + make_tuple(16, 16, 12, 0, 8, &aom_highbd_avg_8x8_neon), + make_tuple(16, 16, 12, 5, 8, &aom_highbd_avg_8x8_neon), + make_tuple(32, 32, 12, 15, 8, &aom_highbd_avg_8x8_neon))); +#endif // HAVE_NEON +#endif // CONFIG_AV1_HIGHBITDEPTH + +typedef int (*SatdFunc)(const tran_low_t *coeffs, int length); +typedef int (*SatdLpFunc)(const int16_t *coeffs, int length); + +template <typename SatdFuncType> +struct SatdTestParam { + SatdTestParam(int s, SatdFuncType f1, SatdFuncType f2) + : satd_size(s), func_ref(f1), func_simd(f2) {} + friend std::ostream &operator<<(std::ostream &os, + const SatdTestParam<SatdFuncType> ¶m) { + return os << "satd_size: " << param.satd_size; + } + int satd_size; + SatdFuncType func_ref; + SatdFuncType func_simd; +}; + +template <typename CoeffType, typename SatdFuncType> +class SatdTestBase + : public ::testing::Test, + public ::testing::WithParamInterface<SatdTestParam<SatdFuncType>> { + protected: + explicit SatdTestBase(const SatdTestParam<SatdFuncType> &func_param) { + satd_size_ = func_param.satd_size; + satd_func_ref_ = func_param.func_ref; + satd_func_simd_ = func_param.func_simd; + } + void SetUp() override { + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<CoeffType *>( + aom_memalign(32, sizeof(*src_) * satd_size_)); + ASSERT_NE(src_, nullptr); + } + void TearDown() override { aom_free(src_); } + void FillConstant(const CoeffType val) { + for (int i = 0; i < satd_size_; ++i) src_[i] = val; + } + void FillRandom() { + for (int i = 0; i < satd_size_; ++i) { + src_[i] = static_cast<int16_t>(rnd_.Rand16()); + } + } + void Check(int expected) { + int total_ref; + API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_)); + EXPECT_EQ(expected, total_ref); + + int total_simd; + API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_)); + EXPECT_EQ(expected, total_simd); + } + void RunComparison() { + int total_ref; + API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_)); + + int total_simd; + API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_)); + + EXPECT_EQ(total_ref, total_simd); + } + void RunSpeedTest() { + const int numIter = 500000; + printf("size = %d number of iteration is %d \n", satd_size_, numIter); + + int total_ref; + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + for (int i = 0; i < numIter; i++) { + total_ref = satd_func_ref_(src_, satd_size_); + } + aom_usec_timer_mark(&c_timer_); + + int total_simd; + aom_usec_timer simd_timer_; + aom_usec_timer_start(&simd_timer_); + + for (int i = 0; i < numIter; i++) { + total_simd = satd_func_simd_(src_, satd_size_); + } + aom_usec_timer_mark(&simd_timer_); + + const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + const int simd_sum_time = + static_cast<int>(aom_usec_timer_elapsed(&simd_timer_)); + + printf( + "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time, + simd_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time))); + + EXPECT_EQ(total_ref, total_simd) << "Output mismatch \n"; + } + int satd_size_; + + private: + CoeffType *src_; + SatdFuncType satd_func_ref_; + SatdFuncType satd_func_simd_; + ACMRandom rnd_; +}; + +class SatdTest : public SatdTestBase<tran_low_t, SatdFunc> { + public: + SatdTest() : SatdTestBase(GetParam()) {} +}; + +TEST_P(SatdTest, MinValue) { + const int kMin = -524287; + const int expected = -kMin * satd_size_; + FillConstant(kMin); + Check(expected); +} +TEST_P(SatdTest, MaxValue) { + const int kMax = 524287; + const int expected = kMax * satd_size_; + FillConstant(kMax); + Check(expected); +} +TEST_P(SatdTest, Random) { + int expected; + switch (satd_size_) { + case 16: expected = 205298; break; + case 64: expected = 1113950; break; + case 256: expected = 4268415; break; + case 1024: expected = 16954082; break; + default: + FAIL() << "Invalid satd size (" << satd_size_ + << ") valid: 16/64/256/1024"; + } + FillRandom(); + Check(expected); +} +TEST_P(SatdTest, Match) { + FillRandom(); + RunComparison(); +} +TEST_P(SatdTest, DISABLED_Speed) { + FillRandom(); + RunSpeedTest(); +} +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdTest); + +INSTANTIATE_TEST_SUITE_P( + C, SatdTest, + ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_c), + SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_c), + SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_c), + SatdTestParam<SatdFunc>(1024, &aom_satd_c, &aom_satd_c))); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, SatdTest, + ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_neon), + SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_neon), + SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_neon), + SatdTestParam<SatdFunc>(1024, &aom_satd_c, + &aom_satd_neon))); +INSTANTIATE_TEST_SUITE_P( + NEON, VectorVarTest, + ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_neon), + make_tuple(3, &aom_vector_var_c, &aom_vector_var_neon), + make_tuple(4, &aom_vector_var_c, &aom_vector_var_neon), + make_tuple(5, &aom_vector_var_c, &aom_vector_var_neon))); +#endif + +#if HAVE_SVE +INSTANTIATE_TEST_SUITE_P( + SVE, VectorVarTest, + ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sve), + make_tuple(3, &aom_vector_var_c, &aom_vector_var_sve), + make_tuple(4, &aom_vector_var_c, &aom_vector_var_sve), + make_tuple(5, &aom_vector_var_c, &aom_vector_var_sve))); +#endif // HAVE_SVE + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, VectorVarTest, + ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sse4_1), + make_tuple(3, &aom_vector_var_c, &aom_vector_var_sse4_1), + make_tuple(4, &aom_vector_var_c, &aom_vector_var_sse4_1), + make_tuple(5, &aom_vector_var_c, + &aom_vector_var_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, SatdTest, + ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_avx2), + SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_avx2), + SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_avx2), + SatdTestParam<SatdFunc>(1024, &aom_satd_c, + &aom_satd_avx2))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, VectorVarTest, + ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_avx2), + make_tuple(3, &aom_vector_var_c, &aom_vector_var_avx2), + make_tuple(4, &aom_vector_var_c, &aom_vector_var_avx2), + make_tuple(5, &aom_vector_var_c, &aom_vector_var_avx2))); +#endif // HAVE_AVX2 + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, SatdTest, + ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_sse2), + SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_sse2), + SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_sse2), + SatdTestParam<SatdFunc>(1024, &aom_satd_c, + &aom_satd_sse2))); +#endif + +class SatdLpTest : public SatdTestBase<int16_t, SatdLpFunc> { + public: + SatdLpTest() : SatdTestBase(GetParam()) {} +}; + +TEST_P(SatdLpTest, MinValue) { + const int kMin = -32640; + const int expected = -kMin * satd_size_; + FillConstant(kMin); + Check(expected); +} +TEST_P(SatdLpTest, MaxValue) { + const int kMax = 32640; + const int expected = kMax * satd_size_; + FillConstant(kMax); + Check(expected); +} +TEST_P(SatdLpTest, Random) { + int expected; + switch (satd_size_) { + case 16: expected = 205298; break; + case 64: expected = 1113950; break; + case 256: expected = 4268415; break; + case 1024: expected = 16954082; break; + default: + FAIL() << "Invalid satd size (" << satd_size_ + << ") valid: 16/64/256/1024"; + } + FillRandom(); + Check(expected); +} +TEST_P(SatdLpTest, Match) { + FillRandom(); + RunComparison(); +} +TEST_P(SatdLpTest, DISABLED_Speed) { + FillRandom(); + RunSpeedTest(); +} +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdLpTest); + +// Add the following c test to avoid gtest uninitialized warning. +INSTANTIATE_TEST_SUITE_P( + C, SatdLpTest, + ::testing::Values( + SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_c), + SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_c), + SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_c), + SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_c))); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, SatdLpTest, + ::testing::Values( + SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_neon), + SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_neon), + SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_neon), + SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_neon))); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, SatdLpTest, + ::testing::Values( + SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_avx2), + SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_avx2), + SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_avx2), + SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_avx2))); +#endif + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, SatdLpTest, + ::testing::Values( + SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_sse2), + SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_sse2), + SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_sse2), + SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_sse2))); +#endif + +} // namespace diff --git a/third_party/aom/test/avif_progressive_test.cc b/third_party/aom/test/avif_progressive_test.cc new file mode 100644 index 0000000000..2a28ca368b --- /dev/null +++ b/third_party/aom/test/avif_progressive_test.cc @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2023, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstddef> +#include <vector> + +#include "aom/aomcx.h" +#include "aom/aom_codec.h" +#include "aom/aom_encoder.h" +#include "aom/aom_image.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +// This test emulates how libavif calls libaom functions to encode a +// progressive AVIF image in libavif's ProgressiveTest.QualityChange test. +TEST(AVIFProgressiveTest, QualityChange) { + constexpr int kWidth = 256; + constexpr int kHeight = 256; + // Dummy buffer of neutral gray samples. + constexpr size_t kBufferSize = 3 * kWidth * kHeight; + std::vector<unsigned char> buffer(kBufferSize, + static_cast<unsigned char>(128)); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I444, kWidth, kHeight, 1, + buffer.data())); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY)); + cfg.g_profile = 1; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_bit_depth = AOM_BITS_8; + cfg.g_input_bit_depth = 8; + cfg.g_lag_in_frames = 0; + cfg.rc_end_usage = AOM_Q; + cfg.rc_min_quantizer = 50; + cfg.rc_max_quantizer = 50; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 50)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_NUMBER_SPATIAL_LAYERS, 2)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM)); + + // First frame (layer 0) + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Second frame (layer 1) + cfg.rc_min_quantizer = 0; + cfg.rc_max_quantizer = 0; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AV1E_SET_LOSSLESS, 1)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 1)); + aom_enc_frame_flags_t encode_flags = + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD | + AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, encode_flags)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +// This test emulates how libavif calls libaom functions to encode a +// progressive AVIF image in libavif's ProgressiveTest.DimensionChange test. +TEST(AVIFProgressiveTest, DimensionChange) { + constexpr int kWidth = 256; + constexpr int kHeight = 256; + // Dummy buffer of neutral gray samples. + constexpr size_t kBufferSize = 3 * kWidth * kHeight; + std::vector<unsigned char> buffer(kBufferSize, + static_cast<unsigned char>(128)); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I444, kWidth, kHeight, 1, + buffer.data())); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY)); + cfg.g_profile = 1; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_bit_depth = AOM_BITS_8; + cfg.g_input_bit_depth = 8; + cfg.g_lag_in_frames = 0; + cfg.rc_end_usage = AOM_Q; + cfg.rc_min_quantizer = 0; + cfg.rc_max_quantizer = 0; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AV1E_SET_LOSSLESS, 1)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_NUMBER_SPATIAL_LAYERS, 2)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM)); + + // First frame (layer 0) + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 0)); + aom_scaling_mode_t scaling_mode = { AOME_ONETWO, AOME_ONETWO }; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SCALEMODE, &scaling_mode)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Second frame (layer 1) + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 1)); + aom_enc_frame_flags_t encode_flags = + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD | + AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, encode_flags)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +// This test reproduces bug aomedia:3382. Certain parameters such as width, +// height, g_threads, usage, etc. were carefully chosen based on the +// complicated logic of av1_select_sb_size() to cause an inconsistent sb_size. +TEST(AVIFProgressiveTest, DimensionChangeLargeImageMultiThread) { + constexpr int kWidth = 1920; + constexpr int kHeight = 1080; + // Dummy buffer of neutral gray samples. + constexpr size_t kBufferSize = 2 * kWidth * kHeight; + std::vector<unsigned char> buffer(kBufferSize, + static_cast<unsigned char>(128)); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1, + buffer.data())); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY)); + cfg.g_profile = 0; + cfg.g_w = img.w; + cfg.g_h = img.h; + cfg.g_bit_depth = AOM_BITS_8; + cfg.g_input_bit_depth = 8; + cfg.g_lag_in_frames = 0; + cfg.g_threads = 2; // MultiThread + cfg.rc_end_usage = AOM_Q; + cfg.rc_min_quantizer = 0; + cfg.rc_max_quantizer = 63; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 31)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_NUMBER_SPATIAL_LAYERS, 2)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_ROW_MT, 1)); // MultiThread + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM)); + + // First frame (layer 0) + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 0)); + aom_scaling_mode_t scaling_mode = { AOME_ONETWO, AOME_ONETWO }; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SCALEMODE, &scaling_mode)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Second frame (layer 1) + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 1)); + aom_enc_frame_flags_t encode_flags = + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD | + AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, encode_flags)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +} // namespace diff --git a/third_party/aom/test/best_encode.sh b/third_party/aom/test/best_encode.sh new file mode 100755 index 0000000000..d29fdaed52 --- /dev/null +++ b/third_party/aom/test/best_encode.sh @@ -0,0 +1,101 @@ +#!/bin/bash +# +# Copyright (c) 2016, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# +# Author: jimbankoski@google.com (Jim Bankoski) + +if [[ $# -ne 2 ]]; then + echo "Encodes a file using best known settings (slow!)" + echo " Usage: be [FILE] [BITRATE]" + echo " Example: be akiyo_cif.y4m 200" + exit +fi + +f=$1 # file is first parameter +b=$2 # bitrate is second parameter + +if [[ -e $f.fpf ]]; then + # First-pass file found, do second pass only + aomenc \ + $f \ + -o $f-$b.av1.webm \ + -p 2 \ + --pass=2 \ + --fpf=$f.fpf \ + --good \ + --cpu-used=0 \ + --target-bitrate=$b \ + --auto-alt-ref=1 \ + -v \ + --minsection-pct=0 \ + --maxsection-pct=800 \ + --lag-in-frames=25 \ + --kf-min-dist=0 \ + --kf-max-dist=99999 \ + --static-thresh=0 \ + --min-q=0 \ + --max-q=63 \ + --drop-frame=0 \ + --bias-pct=50 \ + --minsection-pct=0 \ + --maxsection-pct=800 \ + --psnr \ + --arnr-maxframes=7 \ + --arnr-strength=3 +else + # No first-pass file found, do 2-pass encode + aomenc \ + $f \ + -o $f-$b.av1.webm \ + -p 2 \ + --pass=1 \ + --fpf=$f.fpf \ + --good \ + --cpu-used=0 \ + --target-bitrate=$b \ + --auto-alt-ref=1 \ + -v \ + --minsection-pct=0 \ + --maxsection-pct=800 \ + --lag-in-frames=25 \ + --kf-min-dist=0 \ + --kf-max-dist=99999 \ + --static-thresh=0 \ + --min-q=0 \ + --max-q=63 \ + --drop-frame=0 + + aomenc \ + $f \ + -o $f-$b.av1.webm \ + -p 2 \ + --pass=2 \ + --fpf=$f.fpf \ + --good \ + --cpu-used=0 \ + --target-bitrate=$b \ + --auto-alt-ref=1 \ + -v \ + --minsection-pct=0 \ + --maxsection-pct=800 \ + --lag-in-frames=25 \ + --kf-min-dist=0 \ + --kf-max-dist=99999 \ + --static-thresh=0 \ + --min-q=0 \ + --max-q=63 \ + --drop-frame=0 \ + --bias-pct=50 \ + --minsection-pct=0 \ + --maxsection-pct=800 \ + --psnr \ + --arnr-maxframes=7 \ + --arnr-strength=3 +fi diff --git a/third_party/aom/test/binary_codes_test.cc b/third_party/aom/test/binary_codes_test.cc new file mode 100644 index 0000000000..2c2dfb45a8 --- /dev/null +++ b/third_party/aom/test/binary_codes_test.cc @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#include "test/acm_random.h" +#include "aom/aom_integer.h" +#include "aom_dsp/bitreader.h" +#include "aom_dsp/bitwriter.h" +#include "aom_dsp/binary_codes_reader.h" +#include "aom_dsp/binary_codes_writer.h" + +#define ACCT_STR __func__ + +using libaom_test::ACMRandom; + +namespace { + +// Test for Finite subexponential code with reference +TEST(AV1, TestPrimitiveRefsubexpfin) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int kBufferSize = 65536; + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + const uint16_t kRanges = 8; + const uint16_t kSubexpParams = 6; + const uint16_t kReferences = 8; + const uint16_t kValues = 16; + uint16_t enc_values[kRanges][kSubexpParams][kReferences][kValues][4]; + const uint16_t range_vals[kRanges] = { 1, 13, 64, 120, 230, 420, 1100, 8000 }; + aom_start_encode(&bw, bw_buffer); + for (int n = 0; n < kRanges; ++n) { + const uint16_t range = range_vals[n]; + for (int k = 0; k < kSubexpParams; ++k) { + for (int r = 0; r < kReferences; ++r) { + const uint16_t ref = rnd(range); + for (int v = 0; v < kValues; ++v) { + const uint16_t value = rnd(range); + enc_values[n][k][r][v][0] = range; + enc_values[n][k][r][v][1] = k; + enc_values[n][k][r][v][2] = ref; + enc_values[n][k][r][v][3] = value; + aom_write_primitive_refsubexpfin(&bw, range, k, ref, value); + } + } + } + } + GTEST_ASSERT_GE(aom_stop_encode(&bw), 0); + aom_reader br; + aom_reader_init(&br, bw_buffer, bw.pos); + GTEST_ASSERT_GE(aom_reader_tell(&br), 0u); + GTEST_ASSERT_LE(aom_reader_tell(&br), 1u); + for (int n = 0; n < kRanges; ++n) { + for (int k = 0; k < kSubexpParams; ++k) { + for (int r = 0; r < kReferences; ++r) { + for (int v = 0; v < kValues; ++v) { + const uint16_t range = enc_values[n][k][r][v][0]; + assert(k == enc_values[n][k][r][v][1]); + const uint16_t ref = enc_values[n][k][r][v][2]; + const uint16_t value = + aom_read_primitive_refsubexpfin(&br, range, k, ref, ACCT_STR); + GTEST_ASSERT_EQ(value, enc_values[n][k][r][v][3]); + } + } + } + } +} +// TODO(debargha): Adds tests for other primitives +} // namespace diff --git a/third_party/aom/test/blend_a64_mask_1d_test.cc b/third_party/aom/test/blend_a64_mask_1d_test.cc new file mode 100644 index 0000000000..f9549bccb2 --- /dev/null +++ b/third_party/aom/test/blend_a64_mask_1d_test.cc @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/register_state_check.h" +#include "test/function_equivalence_test.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "aom/aom_integer.h" + +#include "av1/common/enums.h" + +#include "aom_dsp/blend.h" + +using libaom_test::FunctionEquivalenceTest; + +namespace { + +template <typename F, typename T> +class BlendA64Mask1DTest : public FunctionEquivalenceTest<F> { + public: + static const int kIterations = 10000; + static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides + static const int kMaxHeight = MAX_SB_SIZE; + static const int kBufSize = kMaxWidth * kMaxHeight; + static const int kMaxMaskWidth = 2 * MAX_SB_SIZE; + static const int kMaxMaskSize = kMaxMaskWidth; + + ~BlendA64Mask1DTest() override = default; + + virtual void Execute(const T *p_src0, const T *p_src1) = 0; + + void Common(int block_size) { + w_ = block_size_wide[block_size]; + h_ = block_size_high[block_size]; + + dst_offset_ = this->rng_(33); + dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + src0_offset_ = this->rng_(33); + src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + src1_offset_ = this->rng_(33); + src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + T *p_src0; + T *p_src1; + + switch (this->rng_(3)) { + case 0: // Separate sources + p_src0 = src0_; + p_src1 = src1_; + break; + case 1: // src0 == dst + p_src0 = dst_tst_; + src0_stride_ = dst_stride_; + src0_offset_ = dst_offset_; + p_src1 = src1_; + break; + case 2: // src1 == dst + p_src0 = src0_; + p_src1 = dst_tst_; + src1_stride_ = dst_stride_; + src1_offset_ = dst_offset_; + break; + default: FAIL(); + } + + Execute(p_src0, p_src1); + + for (int r = 0; r < h_; ++r) { + for (int c = 0; c < w_; ++c) { + ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c], + dst_tst_[dst_offset_ + r * dst_stride_ + c]); + } + } + } + + T dst_ref_[kBufSize]; + T dst_tst_[kBufSize]; + uint32_t dst_stride_; + uint32_t dst_offset_; + + T src0_[kBufSize]; + uint32_t src0_stride_; + uint32_t src0_offset_; + + T src1_[kBufSize]; + uint32_t src1_stride_; + uint32_t src1_offset_; + + uint8_t mask_[kMaxMaskSize]; + + int w_; + int h_; +}; + +////////////////////////////////////////////////////////////////////////////// +// 8 bit version +////////////////////////////////////////////////////////////////////////////// + +typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, + uint32_t src1_stride, const uint8_t *mask, int w, int h); +typedef libaom_test::FuncParam<F8B> TestFuncs; + +class BlendA64Mask1DTest8B : public BlendA64Mask1DTest<F8B, uint8_t> { + protected: + void Execute(const uint8_t *p_src0, const uint8_t *p_src1) override { + params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_, + src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, + w_, h_); + API_REGISTER_STATE_CHECK(params_.tst_func( + dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_, + src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, w_, h_)); + } +}; + +TEST_P(BlendA64Mask1DTest8B, RandomValues) { + for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_.Rand8(); + dst_tst_[i] = rng_.Rand8(); + + src0_[i] = rng_.Rand8(); + src1_[i] = rng_.Rand8(); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + Common(bsize); + } +} + +TEST_P(BlendA64Mask1DTest8B, ExtremeValues) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(2) + 254; + dst_tst_[i] = rng_(2) + 254; + src0_[i] = rng_(2) + 254; + src1_[i] = rng_(2) + 254; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; + + for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { + Common(bsize); + } +} + +static void blend_a64_hmask_ref(uint8_t *dst, uint32_t dst_stride, + const uint8_t *src0, uint32_t src0_stride, + const uint8_t *src1, uint32_t src1_stride, + const uint8_t *mask, int w, int h) { + uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize] + [BlendA64Mask1DTest8B::kMaxMaskSize]; + + for (int row = 0; row < h; ++row) + for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col]; + + aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride, + &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, w, h, + 0, 0); +} + +static void blend_a64_vmask_ref(uint8_t *dst, uint32_t dst_stride, + const uint8_t *src0, uint32_t src0_stride, + const uint8_t *src1, uint32_t src1_stride, + const uint8_t *mask, int w, int h) { + uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize] + [BlendA64Mask1DTest8B::kMaxMaskSize]; + + for (int row = 0; row < h; ++row) + for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row]; + + aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride, + &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, w, h, + 0, 0); +} + +INSTANTIATE_TEST_SUITE_P( + C, BlendA64Mask1DTest8B, + ::testing::Values(TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_c), + TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_c))); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, BlendA64Mask1DTest8B, + ::testing::Values( + TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_sse4_1), + TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, BlendA64Mask1DTest8B, + ::testing::Values(TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_neon), + TestFuncs(blend_a64_vmask_ref, + aom_blend_a64_vmask_neon))); +#endif // HAVE_NEON + +////////////////////////////////////////////////////////////////////////////// +// High bit-depth version +////////////////////////////////////////////////////////////////////////////// +#if CONFIG_AV1_HIGHBITDEPTH +typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, + uint32_t src1_stride, const uint8_t *mask, int w, int h, + int bd); +typedef libaom_test::FuncParam<FHBD> TestFuncsHBD; + +class BlendA64Mask1DTestHBD : public BlendA64Mask1DTest<FHBD, uint16_t> { + protected: + void Execute(const uint16_t *p_src0, const uint16_t *p_src1) override { + params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_, + CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, + CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, + mask_, w_, h_, bit_depth_); + API_REGISTER_STATE_CHECK(params_.tst_func( + CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_, + CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, + CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_, w_, h_, + bit_depth_)); + } + + int bit_depth_; +}; + +TEST_P(BlendA64Mask1DTestHBD, RandomValues) { + for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) { + for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { + const int hi = 1 << bit_depth_; + + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(hi); + dst_tst_[i] = rng_(hi); + src0_[i] = rng_(hi); + src1_[i] = rng_(hi); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + Common(bsize); + } + } +} + +TEST_P(BlendA64Mask1DTestHBD, ExtremeValues) { + for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) { + const int hi = 1 << bit_depth_; + const int lo = hi - 2; + + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(hi - lo) + lo; + dst_tst_[i] = rng_(hi - lo) + lo; + src0_[i] = rng_(hi - lo) + lo; + src1_[i] = rng_(hi - lo) + lo; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; + + for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { + Common(bsize); + } + } +} + +static void highbd_blend_a64_hmask_ref( + uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, + const uint8_t *mask, int w, int h, int bd) { + uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize] + [BlendA64Mask1DTestHBD::kMaxMaskSize]; + + for (int row = 0; row < h; ++row) + for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col]; + + aom_highbd_blend_a64_mask_c( + dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0], + BlendA64Mask1DTestHBD::kMaxMaskSize, w, h, 0, 0, bd); +} + +static void highbd_blend_a64_vmask_ref( + uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, + const uint8_t *mask, int w, int h, int bd) { + uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize] + [BlendA64Mask1DTestHBD::kMaxMaskSize]; + + for (int row = 0; row < h; ++row) + for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row]; + + aom_highbd_blend_a64_mask_c( + dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0], + BlendA64Mask1DTestHBD::kMaxMaskSize, w, h, 0, 0, bd); +} + +INSTANTIATE_TEST_SUITE_P( + C, BlendA64Mask1DTestHBD, + ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref, + aom_highbd_blend_a64_hmask_c), + TestFuncsHBD(highbd_blend_a64_vmask_ref, + aom_highbd_blend_a64_vmask_c))); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, BlendA64Mask1DTestHBD, + ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref, + aom_highbd_blend_a64_hmask_sse4_1), + TestFuncsHBD(highbd_blend_a64_vmask_ref, + aom_highbd_blend_a64_vmask_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, BlendA64Mask1DTestHBD, + ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref, + aom_highbd_blend_a64_hmask_neon), + TestFuncsHBD(highbd_blend_a64_vmask_ref, + aom_highbd_blend_a64_vmask_neon))); +#endif // HAVE_NEON + +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/blend_a64_mask_test.cc b/third_party/aom/test/blend_a64_mask_test.cc new file mode 100644 index 0000000000..fafc7f0329 --- /dev/null +++ b/third_party/aom/test/blend_a64_mask_test.cc @@ -0,0 +1,649 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/register_state_check.h" +#include "test/function_equivalence_test.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "aom/aom_integer.h" + +#include "av1/common/enums.h" + +#include "aom_dsp/blend.h" + +using libaom_test::FunctionEquivalenceTest; + +namespace { + +template <typename BlendA64Func, typename SrcPixel, typename DstPixel> +class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> { + protected: + static const int kIterations = 10000; + static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides + static const int kMaxHeight = MAX_SB_SIZE; + static const int kBufSize = kMaxWidth * kMaxHeight; + static const int kMaxMaskWidth = 2 * MAX_SB_SIZE; + static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth; + + ~BlendA64MaskTest() override = default; + + virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1, + int run_times) = 0; + + template <typename Pixel> + void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/, int run_times) { + if (run_times > 1) { + *src0 = src0_; + *src1 = src1_; + return; + } + switch (this->rng_(3)) { + case 0: // Separate sources + *src0 = src0_; + *src1 = src1_; + break; + case 1: // src0 == dst + *src0 = dst_tst_; + src0_stride_ = dst_stride_; + src0_offset_ = dst_offset_; + *src1 = src1_; + break; + case 2: // src1 == dst + *src0 = src0_; + *src1 = dst_tst_; + src1_stride_ = dst_stride_; + src1_offset_ = dst_offset_; + break; + default: FAIL(); + } + } + + void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/, + int /*run_times*/) { + *src0 = src0_; + *src1 = src1_; + } + + uint8_t Rand1() { return this->rng_.Rand8() & 1; } + + void RunOneTest(int block_size, int subx, int suby, int run_times) { + w_ = block_size_wide[block_size]; + h_ = block_size_high[block_size]; + run_times = run_times > 1 ? run_times / w_ : 1; + ASSERT_GT(run_times, 0); + subx_ = subx; + suby_ = suby; + + dst_offset_ = this->rng_(33); + dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + src0_offset_ = this->rng_(33); + src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + src1_offset_ = this->rng_(33); + src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; + + mask_stride_ = + this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1); + + SrcPixel *p_src0; + SrcPixel *p_src1; + + p_src0 = src0_; + p_src1 = src1_; + + GetSources(&p_src0, &p_src1, &dst_ref_[0], run_times); + + Execute(p_src0, p_src1, run_times); + + for (int r = 0; r < h_; ++r) { + for (int c = 0; c < w_; ++c) { + ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c], + dst_tst_[dst_offset_ + r * dst_stride_ + c]) + << w_ << "x" << h_ << " subx " << subx_ << " suby " << suby_ + << " r: " << r << " c: " << c; + } + } + } + + void RunTest(int block_size, int run_times) { + for (subx_ = 0; subx_ <= 1; subx_++) { + for (suby_ = 0; suby_ <= 1; suby_++) { + RunOneTest(block_size, subx_, suby_, run_times); + } + } + } + + DstPixel dst_ref_[kBufSize]; + DstPixel dst_tst_[kBufSize]; + uint32_t dst_stride_; + uint32_t dst_offset_; + + SrcPixel src0_[kBufSize]; + uint32_t src0_stride_; + uint32_t src0_offset_; + + SrcPixel src1_[kBufSize]; + uint32_t src1_stride_; + uint32_t src1_offset_; + + uint8_t mask_[kMaxMaskSize]; + size_t mask_stride_; + + int w_; + int h_; + + int suby_; + int subx_; +}; + +////////////////////////////////////////////////////////////////////////////// +// 8 bit version +////////////////////////////////////////////////////////////////////////////// + +typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, + uint32_t src1_stride, const uint8_t *mask, + uint32_t mask_stride, int w, int h, int subx, int suby); +typedef libaom_test::FuncParam<F8B> TestFuncs; + +class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> { + protected: + void Execute(const uint8_t *p_src0, const uint8_t *p_src1, + int run_times) override { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, + p_src0 + src0_offset_, src0_stride_, + p_src1 + src1_offset_, src1_stride_, mask_, + kMaxMaskWidth, w_, h_, subx_, suby_); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.tst_func(dst_tst_ + dst_offset_, dst_stride_, + p_src0 + src0_offset_, src0_stride_, + p_src1 + src1_offset_, src1_stride_, mask_, + kMaxMaskWidth, w_, h_, subx_, suby_); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 1) { + printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, + time1, time2); + printf("(%3.2f)\n", time1 / time2); + } + } +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B); + +TEST_P(BlendA64MaskTest8B, RandomValues) { + for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_.Rand8(); + dst_tst_[i] = rng_.Rand8(); + + src0_[i] = rng_.Rand8(); + src1_[i] = rng_.Rand8(); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + RunTest(bsize, 1); + } +} + +TEST_P(BlendA64MaskTest8B, ExtremeValues) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(2) + 254; + dst_tst_[i] = rng_(2) + 254; + src0_[i] = rng_(2) + 254; + src1_[i] = rng_(2) + 254; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; + + for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) + RunTest(bsize, 1); +} + +TEST_P(BlendA64MaskTest8B, DISABLED_Speed) { + const int kRunTimes = 10000000; + for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_.Rand8(); + dst_tst_[i] = rng_.Rand8(); + + src0_[i] = rng_.Rand8(); + src1_[i] = rng_.Rand8(); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + RunTest(bsize, kRunTimes); + } +} +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, BlendA64MaskTest8B, + ::testing::Values(TestFuncs( + aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, BlendA64MaskTest8B, + ::testing::Values(TestFuncs(aom_blend_a64_mask_sse4_1, + aom_blend_a64_mask_avx2))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, BlendA64MaskTest8B, + ::testing::Values(TestFuncs(aom_blend_a64_mask_c, + aom_blend_a64_mask_neon))); +#endif // HAVE_NEON + +////////////////////////////////////////////////////////////////////////////// +// 8 bit _d16 version +////////////////////////////////////////////////////////////////////////////// + +typedef void (*F8B_D16)(uint8_t *dst, uint32_t dst_stride, const uint16_t *src0, + uint32_t src0_stride, const uint16_t *src1, + uint32_t src1_stride, const uint8_t *mask, + uint32_t mask_stride, int w, int h, int subx, int suby, + ConvolveParams *conv_params); +typedef libaom_test::FuncParam<F8B_D16> TestFuncs_d16; + +class BlendA64MaskTest8B_d16 + : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> { + protected: + // max number of bits used by the source + static const int kSrcMaxBitsMask = 0x3fff; + + void Execute(const uint16_t *p_src0, const uint16_t *p_src1, + int run_times) override { + ConvolveParams conv_params; + conv_params.round_0 = ROUND0_BITS; + conv_params.round_1 = COMPOUND_ROUND1_BITS; + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, + p_src0 + src0_offset_, src0_stride_, + p_src1 + src1_offset_, src1_stride_, mask_, + kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.tst_func(dst_tst_ + dst_offset_, dst_stride_, + p_src0 + src0_offset_, src0_stride_, + p_src1 + src1_offset_, src1_stride_, mask_, + kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 1) { + printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, + time1, time2); + printf("(%3.2f)\n", time1 / time2); + } + } +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B_d16); + +TEST_P(BlendA64MaskTest8B_d16, RandomValues) { + for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_.Rand8(); + dst_tst_[i] = rng_.Rand8(); + + src0_[i] = rng_.Rand16() & kSrcMaxBitsMask; + src1_[i] = rng_.Rand16() & kSrcMaxBitsMask; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + RunTest(bsize, 1); + } +} + +TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = 255; + dst_tst_[i] = 255; + + src0_[i] = kSrcMaxBitsMask; + src1_[i] = kSrcMaxBitsMask; + } + + for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1; + + for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) + RunTest(bsize, 1); +} + +TEST_P(BlendA64MaskTest8B_d16, DISABLED_Speed) { + const int kRunTimes = 10000000; + for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_.Rand8(); + dst_tst_[i] = rng_.Rand8(); + + src0_[i] = rng_.Rand16() & kSrcMaxBitsMask; + src1_[i] = rng_.Rand16() & kSrcMaxBitsMask; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + RunTest(bsize, kRunTimes); + } +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, BlendA64MaskTest8B_d16, + ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c, + aom_lowbd_blend_a64_d16_mask_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, BlendA64MaskTest8B_d16, + ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c, + aom_lowbd_blend_a64_d16_mask_avx2))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, BlendA64MaskTest8B_d16, + ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c, + aom_lowbd_blend_a64_d16_mask_neon))); +#endif // HAVE_NEON + +////////////////////////////////////////////////////////////////////////////// +// High bit-depth version +////////////////////////////////////////////////////////////////////////////// +#if CONFIG_AV1_HIGHBITDEPTH +typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, + uint32_t src0_stride, const uint8_t *src1, + uint32_t src1_stride, const uint8_t *mask, + uint32_t mask_stride, int w, int h, int subx, int suby, + int bd); +typedef libaom_test::FuncParam<FHBD> TestFuncsHBD; + +class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> { + protected: + void Execute(const uint16_t *p_src0, const uint16_t *p_src1, + int run_times) override { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_, + CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, + CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, + mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_, + CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, + CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, + mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 1) { + printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, + time1, time2); + printf("(%3.2f)\n", time1 / time2); + } + } + + int bit_depth_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTestHBD); + +TEST_P(BlendA64MaskTestHBD, RandomValues) { + for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure(); + bit_depth_ += 2) { + const int hi = 1 << bit_depth_; + + for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(hi); + dst_tst_[i] = rng_(hi); + src0_[i] = rng_(hi); + src1_[i] = rng_(hi); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + RunTest(bsize, 1); + } + } +} + +TEST_P(BlendA64MaskTestHBD, ExtremeValues) { + for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure(); + bit_depth_ += 2) { + const int hi = 1 << bit_depth_; + const int lo = hi - 2; + + for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); + ++bsize) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_(hi - lo) + lo; + dst_tst_[i] = rng_(hi - lo) + lo; + src0_[i] = rng_(hi - lo) + lo; + src1_[i] = rng_(hi - lo) + lo; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; + + RunTest(bsize, 1); + } + } +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, BlendA64MaskTestHBD, + ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, + aom_highbd_blend_a64_mask_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, BlendA64MaskTestHBD, + ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, + aom_highbd_blend_a64_mask_neon))); +#endif // HAVE_NEON + +////////////////////////////////////////////////////////////////////////////// +// HBD _d16 version +////////////////////////////////////////////////////////////////////////////// + +typedef void (*FHBD_D16)(uint8_t *dst, uint32_t dst_stride, + const CONV_BUF_TYPE *src0, uint32_t src0_stride, + const CONV_BUF_TYPE *src1, uint32_t src1_stride, + const uint8_t *mask, uint32_t mask_stride, int w, + int h, int subx, int suby, ConvolveParams *conv_params, + const int bd); +typedef libaom_test::FuncParam<FHBD_D16> TestFuncsHBD_d16; + +class BlendA64MaskTestHBD_d16 + : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> { + protected: + // max number of bits used by the source + static const int kSrcMaxBitsMask = (1 << 14) - 1; + static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1; + + void Execute(const uint16_t *p_src0, const uint16_t *p_src1, + int run_times) override { + ASSERT_GT(run_times, 0) << "Cannot run 0 iterations of the test."; + ConvolveParams conv_params; + conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS; + conv_params.round_1 = COMPOUND_ROUND1_BITS; + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_, + p_src0 + src0_offset_, src0_stride_, + p_src1 + src1_offset_, src1_stride_, mask_, + kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params, + bit_depth_); + } + if (params_.tst_func) { + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), + dst_stride_, p_src0 + src0_offset_, src0_stride_, + p_src1 + src1_offset_, src1_stride_, mask_, + kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params, + bit_depth_); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 1) { + printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, + time1, time2); + printf("(%3.2f)\n", time1 / time2); + } + } + } + + int bit_depth_; + int src_max_bits_mask_; +}; + +TEST_P(BlendA64MaskTestHBD_d16, RandomValues) { + if (params_.tst_func == nullptr) return; + for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure(); + bit_depth_ += 2) { + src_max_bits_mask_ = + (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD; + + for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); + ++bsize) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_.Rand8(); + dst_tst_[i] = rng_.Rand8(); + + src0_[i] = rng_.Rand16() & src_max_bits_mask_; + src1_[i] = rng_.Rand16() & src_max_bits_mask_; + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + RunTest(bsize, 1); + } + } +} + +TEST_P(BlendA64MaskTestHBD_d16, ExtremeValues) { + for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) { + src_max_bits_mask_ = + (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD; + + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = 0; + dst_tst_[i] = (1 << bit_depth_) - 1; + + src0_[i] = src_max_bits_mask_; + src1_[i] = src_max_bits_mask_; + } + + for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA; + for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { + RunTest(bsize, 1); + } + } +} + +TEST_P(BlendA64MaskTestHBD_d16, DISABLED_Speed) { + const int kRunTimes = 10000000; + for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { + for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) { + for (int i = 0; i < kBufSize; ++i) { + dst_ref_[i] = rng_.Rand12() % (1 << bit_depth_); + dst_tst_[i] = rng_.Rand12() % (1 << bit_depth_); + + src0_[i] = rng_.Rand16(); + src1_[i] = rng_.Rand16(); + } + + for (int i = 0; i < kMaxMaskSize; ++i) + mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); + + RunTest(bsize, kRunTimes); + } + } +} + +INSTANTIATE_TEST_SUITE_P( + C, BlendA64MaskTestHBD_d16, + ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, + aom_highbd_blend_a64_d16_mask_c))); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, BlendA64MaskTestHBD_d16, + ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, + aom_highbd_blend_a64_d16_mask_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, BlendA64MaskTestHBD_d16, + ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, + aom_highbd_blend_a64_d16_mask_avx2))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, BlendA64MaskTestHBD_d16, + ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, + aom_highbd_blend_a64_d16_mask_neon))); +#endif // HAVE_NEON + +// TODO(slavarnway): Enable the following in the avx2 commit. (56501) +#if 0 +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, BlendA64MaskTestHBD, + ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, + aom_highbd_blend_a64_mask_avx2))); +#endif // HAVE_AVX2 +#endif +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/block_test.cc b/third_party/aom/test/block_test.cc new file mode 100644 index 0000000000..686180cf87 --- /dev/null +++ b/third_party/aom/test/block_test.cc @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "aom/aom_codec.h" +#include "av1/common/blockd.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/y4m_video_source.h" +#include "test/util.h" + +// Verify the optimized implementation of get_partition_subsize() produces the +// same results as the Partition_Subsize lookup table in the spec. +TEST(BlockdTest, GetPartitionSubsize) { + // The Partition_Subsize table in the spec (Section 9.3. Conversion tables). + /* clang-format off */ + static const BLOCK_SIZE kPartitionSubsize[10][BLOCK_SIZES_ALL] = { + { + BLOCK_4X4, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X128, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + }, { + BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + }, { + BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + }, { + BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + }, { + BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + }, { + BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + }, { + BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + }, { + BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + }, { + BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X4, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X8, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + }, { + BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X32, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X64, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID + } + }; + /* clang-format on */ + + for (int partition = 0; partition < 10; partition++) { + for (int bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) { + EXPECT_EQ(kPartitionSubsize[partition][bsize], + get_partition_subsize(static_cast<BLOCK_SIZE>(bsize), + static_cast<PARTITION_TYPE>(partition))); + } + } +} + +#if CONFIG_AV1_DECODER && CONFIG_AV1_ENCODER +namespace { +// This class is used to validate if sb_size configured is respected +// in the bitstream +class SuperBlockSizeTestLarge + : public ::libaom_test::CodecTestWith3Params< + libaom_test::TestMode, aom_superblock_size_t, aom_rc_mode>, + public ::libaom_test::EncoderTest { + protected: + SuperBlockSizeTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + superblock_size_(GET_PARAM(2)), rc_end_usage_(GET_PARAM(3)) { + sb_size_violated_ = false; + } + ~SuperBlockSizeTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = rc_end_usage_; + cfg_.g_threads = 1; + cfg_.g_lag_in_frames = 35; + cfg_.rc_target_bitrate = 1000; + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, superblock_size_); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec && + superblock_size_ != AOM_SUPERBLOCK_SIZE_DYNAMIC) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + aom_superblock_size_t sb_size; + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_SB_SIZE, &sb_size); + if (superblock_size_ != sb_size) { + sb_size_violated_ = true; + } + } + return AOM_CODEC_OK == res_dec; + } + + ::libaom_test::TestMode encoding_mode_; + aom_superblock_size_t superblock_size_; + bool sb_size_violated_; + aom_rc_mode rc_end_usage_; +}; + +TEST_P(SuperBlockSizeTestLarge, SuperBlockSizeTest) { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 1); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(sb_size_violated_, false) + << "Failed for SB size " << superblock_size_; +} + +const ::libaom_test::TestMode kTestModes[] = { +#if CONFIG_REALTIME_ONLY + ::libaom_test::kRealTime +#else + ::libaom_test::kRealTime, ::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood +#endif +}; + +AV1_INSTANTIATE_TEST_SUITE(SuperBlockSizeTestLarge, + ::testing::ValuesIn(kTestModes), + ::testing::Values(AOM_SUPERBLOCK_SIZE_64X64, + AOM_SUPERBLOCK_SIZE_128X128), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ)); +} // namespace +#endif diff --git a/third_party/aom/test/boolcoder_test.cc b/third_party/aom/test/boolcoder_test.cc new file mode 100644 index 0000000000..52c58e0b2e --- /dev/null +++ b/third_party/aom/test/boolcoder_test.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "aom/aom_integer.h" +#include "aom_dsp/bitreader.h" +#include "aom_dsp/bitwriter.h" + +using libaom_test::ACMRandom; + +namespace { +const int num_tests = 10; +} // namespace + +TEST(AV1, TestBitIO) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int n = 0; n < num_tests; ++n) { + for (int method = 0; method <= 7; ++method) { // we generate various proba + const int kBitsToTest = 1000; + uint8_t probas[kBitsToTest]; + + for (int i = 0; i < kBitsToTest; ++i) { + const int parity = i & 1; + /* clang-format off */ + probas[i] = + (method == 0) ? 0 : (method == 1) ? 255 : + (method == 2) ? 128 : + (method == 3) ? rnd.Rand8() : + (method == 4) ? (parity ? 0 : 255) : + // alternate between low and high proba: + (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) : + (method == 6) ? + (parity ? rnd(64) : 255 - rnd(64)) : + (parity ? rnd(32) : 255 - rnd(32)); + /* clang-format on */ + } + for (int bit_method = 0; bit_method <= 3; ++bit_method) { + const int random_seed = 6432; + const int kBufferSize = 10000; + ACMRandom bit_rnd(random_seed); + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + aom_start_encode(&bw, bw_buffer); + + int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0; + for (int i = 0; i < kBitsToTest; ++i) { + if (bit_method == 2) { + bit = (i & 1); + } else if (bit_method == 3) { + bit = bit_rnd(2); + } + aom_write(&bw, bit, static_cast<int>(probas[i])); + } + + GTEST_ASSERT_GE(aom_stop_encode(&bw), 0); + + aom_reader br; + aom_reader_init(&br, bw_buffer, bw.pos); + bit_rnd.Reset(random_seed); + for (int i = 0; i < kBitsToTest; ++i) { + if (bit_method == 2) { + bit = (i & 1); + } else if (bit_method == 3) { + bit = bit_rnd(2); + } + GTEST_ASSERT_EQ(aom_read(&br, probas[i], nullptr), bit) + << "pos: " << i << " / " << kBitsToTest + << " bit_method: " << bit_method << " method: " << method; + } + } + } + } +} + +#define FRAC_DIFF_TOTAL_ERROR 0.18 + +TEST(AV1, TestTell) { + const int kBufferSize = 10000; + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + const int kSymbols = 1024; + // Coders are noisier at low probabilities, so we start at p = 4. + for (int p = 4; p < 256; p++) { + double probability = p / 256.; + aom_start_encode(&bw, bw_buffer); + for (int i = 0; i < kSymbols; i++) { + aom_write(&bw, 0, p); + } + GTEST_ASSERT_GE(aom_stop_encode(&bw), 0); + aom_reader br; + aom_reader_init(&br, bw_buffer, bw.pos); + uint32_t last_tell = aom_reader_tell(&br); + uint32_t last_tell_frac = aom_reader_tell_frac(&br); + double frac_diff_total = 0; + GTEST_ASSERT_GE(aom_reader_tell(&br), 0u); + GTEST_ASSERT_LE(aom_reader_tell(&br), 1u); + ASSERT_FALSE(aom_reader_has_overflowed(&br)); + for (int i = 0; i < kSymbols; i++) { + aom_read(&br, p, nullptr); + uint32_t tell = aom_reader_tell(&br); + uint32_t tell_frac = aom_reader_tell_frac(&br); + GTEST_ASSERT_GE(tell, last_tell) + << "tell: " << tell << ", last_tell: " << last_tell; + GTEST_ASSERT_GE(tell_frac, last_tell_frac) + << "tell_frac: " << tell_frac + << ", last_tell_frac: " << last_tell_frac; + // Frac tell should round up to tell. + GTEST_ASSERT_EQ(tell, (tell_frac + 7) >> 3); + last_tell = tell; + frac_diff_total += + fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability)); + last_tell_frac = tell_frac; + } + const uint32_t expected = (uint32_t)(-kSymbols * log2(probability)); + // Last tell should be close to the expected value. + GTEST_ASSERT_LE(last_tell, expected + 20) << " last_tell: " << last_tell; + // The average frac_diff error should be pretty small. + GTEST_ASSERT_LE(frac_diff_total / kSymbols, FRAC_DIFF_TOTAL_ERROR) + << " frac_diff_total: " << frac_diff_total; + ASSERT_FALSE(aom_reader_has_overflowed(&br)); + } +} + +TEST(AV1, TestHasOverflowed) { + const int kBufferSize = 10000; + aom_writer bw; + uint8_t bw_buffer[kBufferSize]; + const int kSymbols = 1024; + // Coders are noisier at low probabilities, so we start at p = 4. + for (int p = 4; p < 256; p++) { + aom_start_encode(&bw, bw_buffer); + for (int i = 0; i < kSymbols; i++) { + aom_write(&bw, 1, p); + } + GTEST_ASSERT_GE(aom_stop_encode(&bw), 0); + aom_reader br; + aom_reader_init(&br, bw_buffer, bw.pos); + ASSERT_FALSE(aom_reader_has_overflowed(&br)); + for (int i = 0; i < kSymbols; i++) { + GTEST_ASSERT_EQ(aom_read(&br, p, nullptr), 1); + ASSERT_FALSE(aom_reader_has_overflowed(&br)); + } + // In the worst case, the encoder uses just a tiny fraction of the last + // byte in the buffer. So to guarantee that aom_reader_has_overflowed() + // returns true, we have to consume very nearly 8 additional bits of data. + // In the worse case, one of the bits in that byte will be 1, and the rest + // will be zero. Once we are past that 1 bit, when the probability of + // reading zero symbol from aom_read() is high, each additional symbol read + // will consume very little additional data (in the case that p == 255, + // approximately -log_2(255/256) ~= 0.0056 bits). In that case it would + // take around 178 calls to consume more than 8 bits. That is only an upper + // bound. In practice we are not guaranteed to hit the worse case and can + // get away with 174 calls. + for (int i = 0; i < 174; i++) { + aom_read(&br, p, nullptr); + } + ASSERT_TRUE(aom_reader_has_overflowed(&br)); + } +} diff --git a/third_party/aom/test/borders_test.cc b/third_party/aom/test/borders_test.cc new file mode 100644 index 0000000000..594c3e8429 --- /dev/null +++ b/third_party/aom/test/borders_test.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <climits> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +class BordersTestLarge + : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>, + public ::libaom_test::EncoderTest { + protected: + BordersTestLarge() : EncoderTest(GET_PARAM(0)) {} + ~BordersTestLarge() override = default; + + void SetUp() override { InitializeConfig(GET_PARAM(1)); } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 1); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + if (pkt->data.frame.flags & AOM_FRAME_IS_KEY) { + } + } +}; + +TEST_P(BordersTestLarge, TestEncodeHighBitrate) { + // Validate that this non multiple of 64 wide clip encodes and decodes + // without a mismatch when passing in a very low max q. This pushes + // the encoder to producing lots of big partitions which will likely + // extend into the border and test the border condition. + cfg_.g_lag_in_frames = 25; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 2000; + cfg_.rc_max_quantizer = 10; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 10); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +TEST_P(BordersTestLarge, TestLowBitrate) { + // Validate that this clip encodes and decodes without a mismatch + // when passing in a very high min q. This pushes the encoder to producing + // lots of small partitions which might will test the other condition. + + cfg_.g_lag_in_frames = 25; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 200; + cfg_.rc_min_quantizer = 40; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 10); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_SUITE(BordersTestLarge, + ::testing::Values(::libaom_test::kTwoPassGood)); +} // namespace diff --git a/third_party/aom/test/cdef_test.cc b/third_party/aom/test/cdef_test.cc new file mode 100644 index 0000000000..ad54407ca7 --- /dev/null +++ b/third_party/aom/test/cdef_test.cc @@ -0,0 +1,962 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <array> +#include <cstdlib> +#include <iostream> +#include <string> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/av1_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "av1/common/cdef_block.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace { + +using CdefFilterBlockFunctions = std::array<cdef_filter_block_func, 4>; + +typedef std::tuple<CdefFilterBlockFunctions, CdefFilterBlockFunctions, + BLOCK_SIZE, int, int> + cdef_dir_param_t; + +class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> { + public: + ~CDEFBlockTest() override = default; + void SetUp() override { + cdef = GET_PARAM(0); + ref_cdef = GET_PARAM(1); + bsize = GET_PARAM(2); + boundary = GET_PARAM(3); + depth = GET_PARAM(4); + } + + protected: + BLOCK_SIZE bsize; + int boundary; + int depth; + CdefFilterBlockFunctions cdef; + CdefFilterBlockFunctions ref_cdef; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockTest); + +typedef CDEFBlockTest CDEFBlockHighbdTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockHighbdTest); + +typedef CDEFBlockTest CDEFSpeedTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedTest); + +typedef CDEFBlockTest CDEFSpeedHighbdTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedHighbdTest); + +int64_t test_cdef(BLOCK_SIZE bsize, int iterations, + CdefFilterBlockFunctions cdef, + CdefFilterBlockFunctions ref_cdef, int boundary, int depth) { + aom_usec_timer ref_timer; + int64_t ref_elapsed_time = 0; + const int size = 8; + const int ysize = size + 2 * CDEF_VBORDER; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]); + DECLARE_ALIGNED(16, static uint16_t, d[size * size]); + DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]); + memset(ref_d, 0, sizeof(ref_d)); + memset(d, 0, sizeof(d)); + + int error = 0, pristrength = 0, secstrength, dir; + int pridamping, secdamping, bits, level, count, + errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0, + errpridamping = 0, errsecdamping = 0; + unsigned int pos = 0; + + const int block_width = + ((bsize == BLOCK_8X8) || (bsize == BLOCK_8X4)) ? 8 : 4; + const int block_height = + ((bsize == BLOCK_8X8) || (bsize == BLOCK_4X8)) ? 8 : 4; + const unsigned int max_pos = size * size >> static_cast<int>(depth == 8); + for (pridamping = 3 + depth - 8; pridamping < 7 - 3 * !!boundary + depth - 8; + pridamping++) { + for (secdamping = 3 + depth - 8; + secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) { + for (count = 0; count < iterations; count++) { + for (level = 0; level < (1 << depth) && !error; + level += (2 + 6 * !!boundary) << (depth - 8)) { + for (bits = 1; bits <= depth && !error; bits += 1 + 3 * !!boundary) { + for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++) + s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0, + (1 << depth) - 1); + if (boundary) { + if (boundary & 1) { // Left + for (int i = 0; i < ysize; i++) + for (int j = 0; j < CDEF_HBORDER; j++) + s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE; + } + if (boundary & 2) { // Right + for (int i = 0; i < ysize; i++) + for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++) + s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE; + } + if (boundary & 4) { // Above + for (int i = 0; i < CDEF_VBORDER; i++) + for (int j = 0; j < CDEF_BSTRIDE; j++) + s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE; + } + if (boundary & 8) { // Below + for (int i = CDEF_VBORDER + size; i < ysize; i++) + for (int j = 0; j < CDEF_BSTRIDE; j++) + s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE; + } + } + for (dir = 0; dir < 8; dir++) { + for (pristrength = 0; pristrength <= 19 << (depth - 8) && !error; + pristrength += (1 + 4 * !!boundary) << (depth - 8)) { + if (pristrength == 16) pristrength = 19; + for (secstrength = 0; secstrength <= 4 << (depth - 8) && !error; + secstrength += 1 << (depth - 8)) { + if (secstrength == 3 << (depth - 8)) continue; + + const int strength_index = + (secstrength == 0) | ((pristrength == 0) << 1); + + aom_usec_timer_start(&ref_timer); + ref_cdef[strength_index]( + ref_d, size, + s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE, + pristrength, secstrength, dir, pridamping, secdamping, + depth - 8, block_width, block_height); + aom_usec_timer_mark(&ref_timer); + ref_elapsed_time += aom_usec_timer_elapsed(&ref_timer); + // If cdef and ref_cdef are the same, we're just testing + // speed + if (cdef[0] != ref_cdef[0]) + API_REGISTER_STATE_CHECK(cdef[strength_index]( + d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE, + pristrength, secstrength, dir, pridamping, secdamping, + depth - 8, block_width, block_height)); + if (ref_cdef[0] != cdef[0]) { + for (pos = 0; pos < max_pos && !error; pos++) { + error = ref_d[pos] != d[pos]; + errdepth = depth; + errpristrength = pristrength; + errsecstrength = secstrength; + errboundary = boundary; + errpridamping = pridamping; + errsecdamping = secdamping; + } + } + } + } + } + } + } + } + } + } + + pos--; + EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch." + << std::endl + << "First error at " << pos % size << "," << pos / size + << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos] + << ") " << std::endl + << "pristrength: " << errpristrength << std::endl + << "pridamping: " << errpridamping << std::endl + << "secstrength: " << errsecstrength << std::endl + << "secdamping: " << errsecdamping << std::endl + << "depth: " << errdepth << std::endl + << "size: " << bsize << std::endl + << "boundary: " << errboundary << std::endl + << std::endl; + + return ref_elapsed_time; +} + +void test_cdef_speed(BLOCK_SIZE bsize, int iterations, + CdefFilterBlockFunctions cdef, + CdefFilterBlockFunctions ref_cdef, int boundary, + int depth) { + int64_t ref_elapsed_time = + test_cdef(bsize, iterations, ref_cdef, ref_cdef, boundary, depth); + + int64_t elapsed_time = + test_cdef(bsize, iterations, cdef, cdef, boundary, depth); + + std::cout << "C time: " << ref_elapsed_time << " us" << std::endl + << "SIMD time: " << elapsed_time << " us" << std::endl; + + EXPECT_GT(ref_elapsed_time, elapsed_time) + << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl + << "C time: " << ref_elapsed_time << " us" << std::endl + << "SIMD time: " << elapsed_time << " us" << std::endl; +} + +typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var, + int coeff_shift); + +typedef std::tuple<find_dir_t, find_dir_t> find_dir_param_t; + +class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> { + public: + ~CDEFFindDirTest() override = default; + void SetUp() override { + finddir = GET_PARAM(0); + ref_finddir = GET_PARAM(1); + } + + protected: + find_dir_t finddir; + find_dir_t ref_finddir; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirTest); + +typedef CDEFFindDirTest CDEFFindDirSpeedTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirSpeedTest); + +void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var, + int coeff_shift), + int (*ref_finddir)(const uint16_t *img, int stride, + int32_t *var, int coeff_shift)) { + const int size = 8; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, s[size * size]); + + int error = 0; + int depth, bits, level, count, errdepth = 0; + int ref_res = 0, res = 0; + int32_t ref_var = 0, var = 0; + + for (depth = 8; depth <= 12 && !error; depth += 2) { + for (count = 0; count < 512 && !error; count++) { + for (level = 0; level < (1 << depth) && !error; + level += 1 << (depth - 8)) { + for (bits = 1; bits <= depth && !error; bits++) { + for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++) + s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0, + (1 << depth) - 1); + for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++) + ref_res = ref_finddir(s, size, &ref_var, depth - 8); + if (finddir != ref_finddir) + API_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8)); + if (ref_finddir != finddir) { + if (res != ref_res || var != ref_var) error = 1; + errdepth = depth; + } + } + } + } + } + + EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch." + << std::endl + << "return: " << res << " : " << ref_res << std::endl + << "var: " << var << " : " << ref_var << std::endl + << "depth: " << errdepth << std::endl + << std::endl; +} + +void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride, + int32_t *var, int coeff_shift), + int (*ref_finddir)(const uint16_t *img, int stride, + int32_t *var, int coeff_shift)) { + aom_usec_timer ref_timer; + aom_usec_timer timer; + + aom_usec_timer_start(&ref_timer); + test_finddir(ref_finddir, ref_finddir); + aom_usec_timer_mark(&ref_timer); + int64_t ref_elapsed_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer_start(&timer); + test_finddir(finddir, finddir); + aom_usec_timer_mark(&timer); + int64_t elapsed_time = aom_usec_timer_elapsed(&timer); + + EXPECT_GT(ref_elapsed_time, elapsed_time) + << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl + << "C time: " << ref_elapsed_time << " us" << std::endl + << "SIMD time: " << elapsed_time << " us" << std::endl; +} + +typedef void (*find_dir_dual_t)(const uint16_t *img1, const uint16_t *img2, + int stride, int32_t *var1, int32_t *var2, + int coeff_shift, int *out1, int *out2); + +typedef std::tuple<find_dir_dual_t, find_dir_dual_t> find_dir_dual_param_t; + +class CDEFFindDirDualTest + : public ::testing::TestWithParam<find_dir_dual_param_t> { + public: + ~CDEFFindDirDualTest() override = default; + void SetUp() override { + finddir = GET_PARAM(0); + ref_finddir = GET_PARAM(1); + } + + protected: + find_dir_dual_t finddir; + find_dir_dual_t ref_finddir; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualTest); + +typedef CDEFFindDirDualTest CDEFFindDirDualSpeedTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualSpeedTest); + +void test_finddir_dual( + void (*finddir)(const uint16_t *img1, const uint16_t *img2, int stride, + int32_t *var1, int32_t *var2, int coeff_shift, int *out1, + int *out2), + void (*ref_finddir)(const uint16_t *img1, const uint16_t *img2, int stride, + int32_t *var1, int32_t *var2, int coeff_shift, + int *out1, int *out2)) { + const int size_wd = 16; + const int size_ht = 8; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, s[size_ht * size_wd]); + + int error = 0, errdepth = 0; + int32_t ref_var[2] = { 0 }; + int ref_dir[2] = { 0 }; + int32_t var[2] = { 0 }; + int dir[2] = { 0 }; + + for (int depth = 8; depth <= 12 && !error; depth += 2) { + for (int count = 0; count < 512 && !error; count++) { + for (int level = 0; level < (1 << depth) && !error; + level += 1 << (depth - 8)) { + for (int bits = 1; bits <= depth && !error; bits++) { + for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++) + s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0, + (1 << depth) - 1); + for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++) + ref_finddir(s, s + 8, size_wd, &ref_var[0], &ref_var[1], depth - 8, + &ref_dir[0], &ref_dir[1]); + if (finddir != ref_finddir) + API_REGISTER_STATE_CHECK(finddir(s, s + 8, size_wd, &var[0], + &var[1], depth - 8, &dir[0], + &dir[1])); + if (ref_finddir != finddir) { + for (int j = 0; j < 2; j++) { + if (ref_dir[j] != dir[j] || ref_var[j] != var[j]) error = 1; + } + errdepth = depth; + } + } + } + } + } + + for (int j = 0; j < 2; j++) { + EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch." + << std::endl + << "direction: " << dir[j] << " : " << ref_dir[j] + << std::endl + << "variance: " << var[j] << " : " << ref_var[j] + << std::endl + << "depth: " << errdepth << std::endl + << std::endl; + } +} + +void test_finddir_dual_speed( + void (*finddir)(const uint16_t *img1, const uint16_t *img2, int stride, + int32_t *var1, int32_t *var2, int coeff_shift, int *out1, + int *out2), + void (*ref_finddir)(const uint16_t *img1, const uint16_t *img2, int stride, + int32_t *var1, int32_t *var2, int coeff_shift, + int *out1, int *out2)) { + aom_usec_timer ref_timer; + aom_usec_timer timer; + + aom_usec_timer_start(&ref_timer); + test_finddir_dual(ref_finddir, ref_finddir); + aom_usec_timer_mark(&ref_timer); + const double ref_elapsed_time = + static_cast<double>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&timer); + test_finddir_dual(finddir, finddir); + aom_usec_timer_mark(&timer); + const double elapsed_time = + static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf( + "ref_time=%lf \t simd_time=%lf \t " + "gain=%lf \n", + ref_elapsed_time, elapsed_time, ref_elapsed_time / elapsed_time); +} + +#define MAX_CDEF_BLOCK 256 + +constexpr int kIterations = 100; + +using CDEFCopyRect8To16 = void (*)(uint16_t *dst, int dstride, + const uint8_t *src, int sstride, int width, + int height); + +using CDEFCopyRect8To16Param = std::tuple<CDEFCopyRect8To16, CDEFCopyRect8To16>; + +class CDEFCopyRect8to16Test + : public ::testing::TestWithParam<CDEFCopyRect8To16Param> { + public: + CDEFCopyRect8to16Test() + : rnd_(libaom_test::ACMRandom::DeterministicSeed()), + test_func_(GET_PARAM(0)), ref_func_(GET_PARAM(1)) {} + ~CDEFCopyRect8to16Test() override = default; + void SetUp() override { + src_ = reinterpret_cast<uint8_t *>( + aom_memalign(8, sizeof(uint8_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); + ASSERT_NE(src_, nullptr); + ref_dst_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); + ASSERT_NE(ref_dst_, nullptr); + test_dst_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); + ASSERT_NE(test_dst_, nullptr); + } + + void TearDown() override { + aom_free(src_); + aom_free(ref_dst_); + aom_free(test_dst_); + } + + void test_copy_rect_8_to_16(CDEFCopyRect8To16 test_func, + CDEFCopyRect8To16 ref_func) { + constexpr int stride = MAX_CDEF_BLOCK; + int error = 0; + for (int k = 0; k < kIterations && !error; k++) { + // This function operates on values of width that are either 4 or a + // multiple of 8. For height, generate a random value between 1 and 256, + // making sure it is even. + const int width = k == 0 ? 4 : (rnd_.Rand8() % 32 + 1) * 8; + const int height = k == 0 ? 4 : (rnd_.Rand8() % 128 + 1) * 2; + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + src_[i * stride + j] = rnd_.Rand8(); + } + } + + ref_func(ref_dst_, stride, src_, stride, width, height); + test_func(test_dst_, stride, src_, stride, width, height); + + int i, j; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + if (test_dst_[i * stride + j] != ref_dst_[i * stride + j]) { + error = 1; + break; + } + } + if (error) { + break; + } + } + EXPECT_EQ(0, error) + << "Error: CDEFCopyRect8to16Test, SIMD and C mismatch." << std::endl + << "First error at " << i << "," << j << " (" + << ref_dst_[i * stride + j] << " : " << test_dst_[i * stride + j] + << ") " << std::endl + << "width: " << width << std::endl + << "height: " << height << std::endl + << std::endl; + } + } + + protected: + libaom_test::ACMRandom rnd_; + uint8_t *src_; + uint16_t *ref_dst_; + uint16_t *test_dst_; + CDEFCopyRect8To16 test_func_; + CDEFCopyRect8To16 ref_func_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFCopyRect8to16Test); + +using CDEFCopyRect16To16 = void (*)(uint16_t *dst, int dstride, + const uint16_t *src, int sstride, int width, + int height); + +using CDEFCopyRect16To16Param = + std::tuple<CDEFCopyRect16To16, CDEFCopyRect16To16>; + +class CDEFCopyRect16to16Test + : public ::testing::TestWithParam<CDEFCopyRect16To16Param> { + public: + CDEFCopyRect16to16Test() + : rnd_(libaom_test::ACMRandom::DeterministicSeed()), + test_func_(GET_PARAM(0)), ref_func_(GET_PARAM(1)) {} + ~CDEFCopyRect16to16Test() override = default; + void SetUp() override { + src_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); + ASSERT_NE(src_, nullptr); + ref_dst_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); + ASSERT_NE(ref_dst_, nullptr); + test_dst_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); + ASSERT_NE(test_dst_, nullptr); + } + + void TearDown() override { + aom_free(src_); + aom_free(ref_dst_); + aom_free(test_dst_); + } + + void test_copy_rect_16_to_16(CDEFCopyRect16To16 test_func, + CDEFCopyRect16To16 ref_func) { + constexpr int stride = MAX_CDEF_BLOCK; + int error = 0; + for (int k = 0; k < kIterations && !error; k++) { + // This function operates on values of width that are either 4 or a + // multiple of 8. For height, generate a random value between 1 and 256, + // making sure it is even. + const int width = k == 0 ? 4 : (rnd_.Rand8() % 32 + 1) * 8; + const int height = k == 0 ? 4 : (rnd_.Rand8() % 128 + 1) * 2; + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + src_[i * stride + j] = rnd_.Rand16(); + } + } + + ref_func(ref_dst_, stride, src_, stride, width, height); + test_func(test_dst_, stride, src_, stride, width, height); + + int i, j; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + if (test_dst_[i * stride + j] != ref_dst_[i * stride + j]) { + error = 1; + break; + } + } + if (error) { + break; + } + } + EXPECT_EQ(0, error) + << "Error: CDEFCopyRect16to16Test, SIMD and C mismatch." << std::endl + << "First error at " << i << "," << j << " (" + << ref_dst_[i * stride + j] << " : " << test_dst_[i * stride + j] + << ") " << std::endl + << "width: " << width << std::endl + << "height: " << height << std::endl + << std::endl; + } + } + + protected: + libaom_test::ACMRandom rnd_; + uint16_t *src_; + uint16_t *ref_dst_; + uint16_t *test_dst_; + CDEFCopyRect16To16 test_func_; + CDEFCopyRect16To16 ref_func_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFCopyRect16to16Test); + +TEST_P(CDEFBlockTest, TestSIMDNoMismatch) { + test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth); +} + +TEST_P(CDEFBlockHighbdTest, TestSIMDHighbdNoMismatch) { + test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth); +} + +TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) { + test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth); +} + +TEST_P(CDEFSpeedHighbdTest, DISABLED_TestSpeed) { + test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth); +} + +TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) { + test_finddir(finddir, ref_finddir); +} + +TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) { + test_finddir_speed(finddir, ref_finddir); +} + +TEST_P(CDEFFindDirDualTest, TestSIMDNoMismatch) { + test_finddir_dual(finddir, ref_finddir); +} + +TEST_P(CDEFFindDirDualSpeedTest, DISABLED_TestSpeed) { + test_finddir_dual_speed(finddir, ref_finddir); +} + +TEST_P(CDEFCopyRect8to16Test, TestSIMDNoMismatch) { + test_copy_rect_8_to_16(test_func_, ref_func_); +} + +TEST_P(CDEFCopyRect16to16Test, TestSIMDNoMismatch) { + test_copy_rect_16_to_16(test_func_, ref_func_); +} + +using std::make_tuple; + +#if (HAVE_SSE2 || HAVE_SSSE3 || HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON) +static const CdefFilterBlockFunctions kCdefFilterFuncC[] = { + { &cdef_filter_8_0_c, &cdef_filter_8_1_c, &cdef_filter_8_2_c, + &cdef_filter_8_3_c } +}; + +static const CdefFilterBlockFunctions kCdefFilterHighbdFuncC[] = { + { &cdef_filter_16_0_c, &cdef_filter_16_0_c, &cdef_filter_16_0_c, + &cdef_filter_16_0_c } +}; +#endif + +#if HAVE_SSE2 +static const CdefFilterBlockFunctions kCdefFilterFuncSse2[] = { + { &cdef_filter_8_0_sse2, &cdef_filter_8_1_sse2, &cdef_filter_8_2_sse2, + &cdef_filter_8_3_sse2 } +}; + +static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSse2[] = { + { &cdef_filter_16_0_sse2, &cdef_filter_16_1_sse2, &cdef_filter_16_2_sse2, + &cdef_filter_16_3_sse2 } +}; + +INSTANTIATE_TEST_SUITE_P( + SSE2, CDEFBlockTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse2), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + SSE2, CDEFBlockHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse2), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); +INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirTest, + ::testing::Values(make_tuple(&cdef_find_dir_sse2, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirDualTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_sse2, + &cdef_find_dir_dual_c))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, CDEFCopyRect8to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, + &cdef_copy_rect8_8bit_to_16bit_sse2))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, CDEFCopyRect16to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, + &cdef_copy_rect8_16bit_to_16bit_sse2))); +#endif + +#if HAVE_SSSE3 +static const CdefFilterBlockFunctions kCdefFilterFuncSsse3[] = { + { &cdef_filter_8_0_ssse3, &cdef_filter_8_1_ssse3, &cdef_filter_8_2_ssse3, + &cdef_filter_8_3_ssse3 } +}; + +static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSsse3[] = { + { &cdef_filter_16_0_ssse3, &cdef_filter_16_1_ssse3, &cdef_filter_16_2_ssse3, + &cdef_filter_16_3_ssse3 } +}; + +INSTANTIATE_TEST_SUITE_P( + SSSE3, CDEFBlockTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + SSSE3, CDEFBlockHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); +INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirTest, + ::testing::Values(make_tuple(&cdef_find_dir_ssse3, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirDualTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3, + &cdef_find_dir_dual_c))); + +INSTANTIATE_TEST_SUITE_P( + SSSE3, CDEFCopyRect8to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, + &cdef_copy_rect8_8bit_to_16bit_ssse3))); + +INSTANTIATE_TEST_SUITE_P( + SSSE3, CDEFCopyRect16to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, + &cdef_copy_rect8_16bit_to_16bit_ssse3))); +#endif + +#if HAVE_SSE4_1 +static const CdefFilterBlockFunctions kCdefFilterFuncSse4_1[] = { + { &cdef_filter_8_0_sse4_1, &cdef_filter_8_1_sse4_1, &cdef_filter_8_2_sse4_1, + &cdef_filter_8_3_sse4_1 } +}; + +static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSse4_1[] = { + { &cdef_filter_16_0_sse4_1, &cdef_filter_16_1_sse4_1, + &cdef_filter_16_2_sse4_1, &cdef_filter_16_3_sse4_1 } +}; + +INSTANTIATE_TEST_SUITE_P( + SSE4_1, CDEFBlockTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + SSE4_1, CDEFBlockHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); +INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirTest, + ::testing::Values(make_tuple(&cdef_find_dir_sse4_1, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P( + SSE4_1, CDEFFindDirDualTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1, + &cdef_find_dir_dual_c))); + +INSTANTIATE_TEST_SUITE_P( + SSE4_1, CDEFCopyRect8to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, + &cdef_copy_rect8_8bit_to_16bit_sse4_1))); + +INSTANTIATE_TEST_SUITE_P( + SSE4_1, CDEFCopyRect16to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, + &cdef_copy_rect8_16bit_to_16bit_sse4_1))); +#endif + +#if HAVE_AVX2 +static const CdefFilterBlockFunctions kCdefFilterFuncAvx2[] = { + { &cdef_filter_8_0_avx2, &cdef_filter_8_1_avx2, &cdef_filter_8_2_avx2, + &cdef_filter_8_3_avx2 } +}; + +static const CdefFilterBlockFunctions kCdefFilterHighbdFuncAvx2[] = { + { &cdef_filter_16_0_avx2, &cdef_filter_16_1_avx2, &cdef_filter_16_2_avx2, + &cdef_filter_16_3_avx2 } +}; + +INSTANTIATE_TEST_SUITE_P( + AVX2, CDEFBlockTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + AVX2, CDEFBlockHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); +INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirTest, + ::testing::Values(make_tuple(&cdef_find_dir_avx2, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirDualTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2, + &cdef_find_dir_dual_c))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, CDEFCopyRect8to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, + &cdef_copy_rect8_8bit_to_16bit_avx2))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, CDEFCopyRect16to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, + &cdef_copy_rect8_16bit_to_16bit_avx2))); +#endif + +#if HAVE_NEON +static const CdefFilterBlockFunctions kCdefFilterFuncNeon[] = { + { &cdef_filter_8_0_neon, &cdef_filter_8_1_neon, &cdef_filter_8_2_neon, + &cdef_filter_8_3_neon } +}; + +static const CdefFilterBlockFunctions kCdefFilterHighbdFuncNeon[] = { + { &cdef_filter_16_0_neon, &cdef_filter_16_1_neon, &cdef_filter_16_2_neon, + &cdef_filter_16_3_neon } +}; + +INSTANTIATE_TEST_SUITE_P( + NEON, CDEFBlockTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + NEON, CDEFBlockHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); +INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirTest, + ::testing::Values(make_tuple(&cdef_find_dir_neon, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirDualTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_neon, + &cdef_find_dir_dual_c))); + +INSTANTIATE_TEST_SUITE_P( + NEON, CDEFCopyRect8to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, + &cdef_copy_rect8_8bit_to_16bit_neon))); + +INSTANTIATE_TEST_SUITE_P( + NEON, CDEFCopyRect16to16Test, + ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, + &cdef_copy_rect8_16bit_to_16bit_neon))); +#endif + +// Test speed for all supported architectures +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, CDEFSpeedTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse2), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + SSE2, CDEFSpeedHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse2), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(10))); +INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_sse2, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirDualSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_sse2, + &cdef_find_dir_dual_c))); +#endif + +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P( + SSSE3, CDEFSpeedTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + SSSE3, CDEFSpeedHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(10))); +INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_ssse3, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirDualSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3, + &cdef_find_dir_dual_c))); +#endif + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, CDEFSpeedTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + SSE4_1, CDEFSpeedHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(10))); +INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_sse4_1, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P( + SSE4_1, CDEFFindDirDualSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1, + &cdef_find_dir_dual_c))); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, CDEFSpeedTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + AVX2, CDEFSpeedHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(10))); +INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_avx2, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirDualSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2, + &cdef_find_dir_dual_c))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, CDEFSpeedTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon), + ::testing::ValuesIn(kCdefFilterFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(8))); +INSTANTIATE_TEST_SUITE_P( + NEON, CDEFSpeedHighbdTest, + ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon), + ::testing::ValuesIn(kCdefFilterHighbdFuncC), + ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8), + ::testing::Range(0, 16), ::testing::Values(10))); +INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_neon, + &cdef_find_dir_c))); +INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirDualSpeedTest, + ::testing::Values(make_tuple(&cdef_find_dir_dual_neon, + &cdef_find_dir_dual_c))); +#endif + +} // namespace diff --git a/third_party/aom/test/cfl_test.cc b/third_party/aom/test/cfl_test.cc new file mode 100644 index 0000000000..7fdea04c36 --- /dev/null +++ b/third_party/aom/test/cfl_test.cc @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "test/util.h" +#include "test/acm_random.h" + +using std::make_tuple; + +using libaom_test::ACMRandom; + +#define NUM_ITERATIONS (100) +#define NUM_ITERATIONS_SPEED (INT16_MAX) + +#define ALL_CFL_TX_SIZES(function) \ + make_tuple(static_cast<TX_SIZE>(TX_4X4), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_4X8), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_4X16), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_8X4), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_8X8), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_8X16), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_8X32), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_16X4), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_16X8), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_16X16), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_16X32), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_32X8), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_32X16), &function), \ + make_tuple(static_cast<TX_SIZE>(TX_32X32), &function) + +#define ALL_CFL_TX_SIZES_SUBSAMPLE(fun420, fun422, fun444) \ + make_tuple(static_cast<TX_SIZE>(TX_4X4), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_4X8), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_4X16), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_8X4), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_8X8), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_8X16), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_8X32), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_16X4), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_16X8), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_16X16), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_16X32), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_32X8), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_32X16), &fun420, &fun422, &fun444), \ + make_tuple(static_cast<TX_SIZE>(TX_32X32), &fun420, &fun422, &fun444) + +namespace { + +template <typename A> +static void assert_eq(const A *a, const A *b, int width, int height) { + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { + ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]); + } + } +} + +static void assertFaster(int ref_elapsed_time, int elapsed_time) { + EXPECT_GT(ref_elapsed_time, elapsed_time) + << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl + << "C time: " << ref_elapsed_time << " us" << std::endl + << "SIMD time: " << elapsed_time << " us" << std::endl; +} + +static void printSpeed(int ref_elapsed_time, int elapsed_time, int width, + int height) { + std::cout.precision(2); + std::cout << "[ ] " << width << "x" << height + << ": C time = " << ref_elapsed_time + << " us, SIMD time = " << elapsed_time << " us" + << " (~" << ref_elapsed_time / (double)elapsed_time << "x) " + << std::endl; +} + +class CFLTest { + public: + virtual ~CFLTest() = default; + void init(TX_SIZE tx) { + tx_size = tx; + width = tx_size_wide[tx_size]; + height = tx_size_high[tx_size]; + rnd.Reset(ACMRandom::DeterministicSeed()); + } + + protected: + TX_SIZE tx_size; + int width; + int height; + ACMRandom rnd; +}; + +template <typename I> +class CFLTestWithData : public CFLTest { + public: + ~CFLTestWithData() override = default; + + protected: + I data[CFL_BUF_SQUARE]; + I data_ref[CFL_BUF_SQUARE]; + void randData(I (ACMRandom::*random)()) { + for (int j = 0; j < this->height; j++) { + for (int i = 0; i < this->width; i++) { + const I d = (this->rnd.*random)(); + data[j * CFL_BUF_LINE + i] = d; + data_ref[j * CFL_BUF_LINE + i] = d; + } + } + } +}; + +template <typename I> +class CFLTestWithAlignedData : public CFLTest { + public: + ~CFLTestWithAlignedData() override { + aom_free(chroma_pels_ref); + aom_free(sub_luma_pels_ref); + aom_free(chroma_pels); + aom_free(sub_luma_pels); + } + + protected: + void init() { + chroma_pels_ref = + reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE)); + ASSERT_NE(chroma_pels_ref, nullptr); + chroma_pels = + reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE)); + ASSERT_NE(chroma_pels, nullptr); + sub_luma_pels_ref = reinterpret_cast<int16_t *>( + aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE)); + ASSERT_NE(sub_luma_pels_ref, nullptr); + sub_luma_pels = reinterpret_cast<int16_t *>( + aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE)); + ASSERT_NE(sub_luma_pels, nullptr); + memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE); + memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE); + memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE); + memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE); + } + + I *chroma_pels_ref; + I *chroma_pels; + int16_t *sub_luma_pels_ref; + int16_t *sub_luma_pels; + int alpha_q3; + I dc; + void randData(int bd) { + alpha_q3 = this->rnd(33) - 16; + dc = this->rnd(1 << bd); + for (int j = 0; j < this->height; j++) { + for (int i = 0; i < this->width; i++) { + chroma_pels[j * CFL_BUF_LINE + i] = dc; + chroma_pels_ref[j * CFL_BUF_LINE + i] = dc; + sub_luma_pels_ref[j * CFL_BUF_LINE + i] = + sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd(1 << (bd + 3)); + } + } + } +}; + +typedef cfl_subtract_average_fn (*sub_avg_fn)(TX_SIZE tx_size); +typedef std::tuple<TX_SIZE, sub_avg_fn> sub_avg_param; +class CFLSubAvgTest : public ::testing::TestWithParam<sub_avg_param>, + public CFLTestWithData<int16_t> { + public: + void SetUp() override { + CFLTest::init(std::get<0>(this->GetParam())); + sub_avg = std::get<1>(this->GetParam())(tx_size); + sub_avg_ref = cfl_get_subtract_average_fn_c(tx_size); + } + ~CFLSubAvgTest() override = default; + + protected: + cfl_subtract_average_fn sub_avg; + cfl_subtract_average_fn sub_avg_ref; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubAvgTest); + +TEST_P(CFLSubAvgTest, SubAvgTest) { + for (int it = 0; it < NUM_ITERATIONS; it++) { + randData(&ACMRandom::Rand15); + sub_avg((uint16_t *)data, data); + sub_avg_ref((uint16_t *)data_ref, data_ref); + assert_eq<int16_t>(data, data_ref, width, height); + } +} + +TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) { + aom_usec_timer ref_timer; + aom_usec_timer timer; + randData(&ACMRandom::Rand15); + aom_usec_timer_start(&ref_timer); + for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) { + sub_avg_ref((uint16_t *)data_ref, data_ref); + } + aom_usec_timer_mark(&ref_timer); + int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer); + aom_usec_timer_start(&timer); + for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) { + sub_avg((uint16_t *)data, data); + } + aom_usec_timer_mark(&timer); + int elapsed_time = (int)aom_usec_timer_elapsed(&timer); + printSpeed(ref_elapsed_time, elapsed_time, width, height); + assertFaster(ref_elapsed_time, elapsed_time); +} + +template <typename S, typename T, typename I> +class CFLSubsampleTest : public ::testing::TestWithParam<S>, + public CFLTestWithData<I> { + public: + void SetUp() override { + CFLTest::init(std::get<0>(this->GetParam())); + fun_420 = std::get<1>(this->GetParam())(this->tx_size); + fun_422 = std::get<2>(this->GetParam())(this->tx_size); + fun_444 = std::get<3>(this->GetParam())(this->tx_size); + } + + protected: + T fun_420; + T fun_422; + T fun_444; + T fun_420_ref; + T fun_422_ref; + T fun_444_ref; + + void subsampleTest(T fun, T fun_ref, int sub_width, int sub_height, + I (ACMRandom::*random)()) { + uint16_t sub_luma_pels[CFL_BUF_SQUARE]; + uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE]; + + for (int it = 0; it < NUM_ITERATIONS; it++) { + CFLTestWithData<I>::randData(random); + fun(this->data, CFL_BUF_LINE, sub_luma_pels); + fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels_ref); + assert_eq<uint16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width, + sub_height); + } + } + + void subsampleSpeedTest(T fun, T fun_ref, I (ACMRandom::*random)()) { + uint16_t sub_luma_pels[CFL_BUF_SQUARE]; + uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE]; + aom_usec_timer ref_timer; + aom_usec_timer timer; + + CFLTestWithData<I>::randData(random); + aom_usec_timer_start(&ref_timer); + for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) { + fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels); + } + aom_usec_timer_mark(&ref_timer); + int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer); + aom_usec_timer_start(&timer); + for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) { + fun(this->data, CFL_BUF_LINE, sub_luma_pels_ref); + } + aom_usec_timer_mark(&timer); + int elapsed_time = (int)aom_usec_timer_elapsed(&timer); + printSpeed(ref_elapsed_time, elapsed_time, this->width, this->height); + assertFaster(ref_elapsed_time, elapsed_time); + } +}; + +typedef cfl_subsample_lbd_fn (*get_subsample_lbd_fn)(TX_SIZE tx_size); +typedef std::tuple<TX_SIZE, get_subsample_lbd_fn, get_subsample_lbd_fn, + get_subsample_lbd_fn> + subsample_lbd_param; +class CFLSubsampleLBDTest + : public CFLSubsampleTest<subsample_lbd_param, cfl_subsample_lbd_fn, + uint8_t> { + public: + ~CFLSubsampleLBDTest() override = default; + void SetUp() override { + CFLSubsampleTest::SetUp(); + fun_420_ref = cfl_get_luma_subsampling_420_lbd_c(tx_size); + fun_422_ref = cfl_get_luma_subsampling_422_lbd_c(tx_size); + fun_444_ref = cfl_get_luma_subsampling_444_lbd_c(tx_size); + } +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleLBDTest); + +TEST_P(CFLSubsampleLBDTest, SubsampleLBD420Test) { + subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1, + &ACMRandom::Rand8); +} + +TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD420SpeedTest) { + subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand8); +} + +TEST_P(CFLSubsampleLBDTest, SubsampleLBD422Test) { + subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand8); +} + +TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD422SpeedTest) { + subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand8); +} + +TEST_P(CFLSubsampleLBDTest, SubsampleLBD444Test) { + subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand8); +} + +TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD444SpeedTest) { + subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand8); +} + +#if CONFIG_AV1_HIGHBITDEPTH +typedef cfl_subsample_hbd_fn (*get_subsample_hbd_fn)(TX_SIZE tx_size); +typedef std::tuple<TX_SIZE, get_subsample_hbd_fn, get_subsample_hbd_fn, + get_subsample_hbd_fn> + subsample_hbd_param; +class CFLSubsampleHBDTest + : public CFLSubsampleTest<subsample_hbd_param, cfl_subsample_hbd_fn, + uint16_t> { + public: + ~CFLSubsampleHBDTest() override = default; + void SetUp() override { + CFLSubsampleTest::SetUp(); + fun_420_ref = cfl_get_luma_subsampling_420_hbd_c(tx_size); + fun_422_ref = cfl_get_luma_subsampling_422_hbd_c(tx_size); + fun_444_ref = cfl_get_luma_subsampling_444_hbd_c(tx_size); + } +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleHBDTest); + +TEST_P(CFLSubsampleHBDTest, SubsampleHBD420Test) { + subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1, + &ACMRandom::Rand12); +} + +TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD420SpeedTest) { + subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand12); +} + +TEST_P(CFLSubsampleHBDTest, SubsampleHBD422Test) { + subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand12); +} + +TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD422SpeedTest) { + subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand12); +} + +TEST_P(CFLSubsampleHBDTest, SubsampleHBD444Test) { + subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand12); +} + +TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD444SpeedTest) { + subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand12); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +typedef cfl_predict_lbd_fn (*get_predict_fn)(TX_SIZE tx_size); +typedef std::tuple<TX_SIZE, get_predict_fn> predict_param; +class CFLPredictTest : public ::testing::TestWithParam<predict_param>, + public CFLTestWithAlignedData<uint8_t> { + public: + void SetUp() override { + CFLTest::init(std::get<0>(this->GetParam())); + CFLTestWithAlignedData::init(); + predict = std::get<1>(this->GetParam())(tx_size); + predict_ref = cfl_get_predict_lbd_fn_c(tx_size); + } + ~CFLPredictTest() override = default; + + protected: + cfl_predict_lbd_fn predict; + cfl_predict_lbd_fn predict_ref; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictTest); + +TEST_P(CFLPredictTest, PredictTest) { + for (int it = 0; it < NUM_ITERATIONS; it++) { + randData(8); + predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3); + predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3); + assert_eq<uint8_t>(chroma_pels, chroma_pels_ref, width, height); + } +} +TEST_P(CFLPredictTest, DISABLED_PredictSpeedTest) { + aom_usec_timer ref_timer; + aom_usec_timer timer; + randData(8); + aom_usec_timer_start(&ref_timer); + for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) { + predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3); + } + aom_usec_timer_mark(&ref_timer); + int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer_start(&timer); + for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) { + predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3); + } + aom_usec_timer_mark(&timer); + int elapsed_time = (int)aom_usec_timer_elapsed(&timer); + printSpeed(ref_elapsed_time, elapsed_time, width, height); + assertFaster(ref_elapsed_time, elapsed_time); +} + +#if CONFIG_AV1_HIGHBITDEPTH +typedef cfl_predict_hbd_fn (*get_predict_fn_hbd)(TX_SIZE tx_size); +typedef std::tuple<TX_SIZE, get_predict_fn_hbd> predict_param_hbd; +class CFLPredictHBDTest : public ::testing::TestWithParam<predict_param_hbd>, + public CFLTestWithAlignedData<uint16_t> { + public: + void SetUp() override { + CFLTest::init(std::get<0>(this->GetParam())); + CFLTestWithAlignedData::init(); + predict = std::get<1>(this->GetParam())(tx_size); + predict_ref = cfl_get_predict_hbd_fn_c(tx_size); + } + ~CFLPredictHBDTest() override = default; + + protected: + cfl_predict_hbd_fn predict; + cfl_predict_hbd_fn predict_ref; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictHBDTest); + +TEST_P(CFLPredictHBDTest, PredictHBDTest) { + int bd = 12; + for (int it = 0; it < NUM_ITERATIONS; it++) { + randData(bd); + predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd); + predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd); + assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height); + } +} +TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) { + aom_usec_timer ref_timer; + aom_usec_timer timer; + const int bd = 12; + randData(bd); + aom_usec_timer_start(&ref_timer); + for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) { + predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd); + } + aom_usec_timer_mark(&ref_timer); + int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer_start(&timer); + for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) { + predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd); + } + aom_usec_timer_mark(&timer); + int elapsed_time = (int)aom_usec_timer_elapsed(&timer); + printSpeed(ref_elapsed_time, elapsed_time, width, height); + assertFaster(ref_elapsed_time, elapsed_time); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +#if HAVE_SSE2 +const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES( + cfl_get_subtract_average_fn_sse2) }; + +INSTANTIATE_TEST_SUITE_P(SSE2, CFLSubAvgTest, + ::testing::ValuesIn(sub_avg_sizes_sse2)); + +#endif + +#if HAVE_SSSE3 +const subsample_lbd_param subsample_lbd_sizes_ssse3[] = { + ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_ssse3, + cfl_get_luma_subsampling_422_lbd_ssse3, + cfl_get_luma_subsampling_444_lbd_ssse3) +}; + +const predict_param predict_sizes_ssse3[] = { ALL_CFL_TX_SIZES( + cfl_get_predict_lbd_fn_ssse3) }; + +INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleLBDTest, + ::testing::ValuesIn(subsample_lbd_sizes_ssse3)); + +INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictTest, + ::testing::ValuesIn(predict_sizes_ssse3)); + +#if CONFIG_AV1_HIGHBITDEPTH +const subsample_hbd_param subsample_hbd_sizes_ssse3[] = { + ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_ssse3, + cfl_get_luma_subsampling_422_hbd_ssse3, + cfl_get_luma_subsampling_444_hbd_ssse3) +}; + +const predict_param_hbd predict_sizes_hbd_ssse3[] = { ALL_CFL_TX_SIZES( + cfl_get_predict_hbd_fn_ssse3) }; + +INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleHBDTest, + ::testing::ValuesIn(subsample_hbd_sizes_ssse3)); + +INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictHBDTest, + ::testing::ValuesIn(predict_sizes_hbd_ssse3)); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_SSSE3 + +#if HAVE_AVX2 +const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES( + cfl_get_subtract_average_fn_avx2) }; + +const subsample_lbd_param subsample_lbd_sizes_avx2[] = { + ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_avx2, + cfl_get_luma_subsampling_422_lbd_avx2, + cfl_get_luma_subsampling_444_lbd_avx2) +}; + +const predict_param predict_sizes_avx2[] = { ALL_CFL_TX_SIZES( + cfl_get_predict_lbd_fn_avx2) }; + +INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubAvgTest, + ::testing::ValuesIn(sub_avg_sizes_avx2)); + +INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleLBDTest, + ::testing::ValuesIn(subsample_lbd_sizes_avx2)); + +INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictTest, + ::testing::ValuesIn(predict_sizes_avx2)); + +#if CONFIG_AV1_HIGHBITDEPTH +const subsample_hbd_param subsample_hbd_sizes_avx2[] = { + ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_avx2, + cfl_get_luma_subsampling_422_hbd_avx2, + cfl_get_luma_subsampling_444_hbd_avx2) +}; + +const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES( + cfl_get_predict_hbd_fn_avx2) }; + +INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleHBDTest, + ::testing::ValuesIn(subsample_hbd_sizes_avx2)); + +INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictHBDTest, + ::testing::ValuesIn(predict_sizes_hbd_avx2)); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_AVX2 + +#if HAVE_NEON +const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES( + cfl_get_subtract_average_fn_neon) }; + +const predict_param predict_sizes_neon[] = { ALL_CFL_TX_SIZES( + cfl_get_predict_lbd_fn_neon) }; + +const subsample_lbd_param subsample_lbd_sizes_neon[] = { + ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_neon, + cfl_get_luma_subsampling_422_lbd_neon, + cfl_get_luma_subsampling_444_lbd_neon) +}; + +INSTANTIATE_TEST_SUITE_P(NEON, CFLSubAvgTest, + ::testing::ValuesIn(sub_avg_sizes_neon)); + +INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleLBDTest, + ::testing::ValuesIn(subsample_lbd_sizes_neon)); + +INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictTest, + ::testing::ValuesIn(predict_sizes_neon)); + +#if CONFIG_AV1_HIGHBITDEPTH +const subsample_hbd_param subsample_hbd_sizes_neon[] = { + ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_neon, + cfl_get_luma_subsampling_422_hbd_neon, + cfl_get_luma_subsampling_444_hbd_neon) +}; + +const predict_param_hbd predict_sizes_hbd_neon[] = { ALL_CFL_TX_SIZES( + cfl_get_predict_hbd_fn_neon) }; + +INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleHBDTest, + ::testing::ValuesIn(subsample_hbd_sizes_neon)); + +INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictHBDTest, + ::testing::ValuesIn(predict_sizes_hbd_neon)); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_NEON + +#if HAVE_VSX +const sub_avg_param sub_avg_sizes_vsx[] = { ALL_CFL_TX_SIZES( + cfl_get_subtract_average_fn_vsx) }; + +INSTANTIATE_TEST_SUITE_P(VSX, CFLSubAvgTest, + ::testing::ValuesIn(sub_avg_sizes_vsx)); +#endif +} // namespace diff --git a/third_party/aom/test/cnn_test.cc b/third_party/aom/test/cnn_test.cc new file mode 100644 index 0000000000..e5114b56ce --- /dev/null +++ b/third_party/aom/test/cnn_test.cc @@ -0,0 +1,2661 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <assert.h> +#include <math.h> +#include <stdio.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "av1/encoder/cnn.h" +#include "av1/encoder/partition_cnn_weights.h" +#include "test/acm_random.h" +#include "test/function_equivalence_test.h" +#include "test/util.h" + +#define SQR(x) ((x) * (x)) + +// Best possible pixelwise guaranteed precision given each float has at most +// 3 specified decimals. +#define PIXELWISE_FLOAT_TOL 1E-2 + +#define MSE_FLOAT_TOL 1E-6 +#define MSE_INT_TOL 0 + +// CNN convolve pixelwise error threshold for functional equivalence. +#define CNN_CONVOLVE_PIXELWISE_FLOAT_TOL 1E-3f + +namespace { + +class CNNTest : public ::testing::Test { + protected: + static void RunCNNTest(int image_width, int image_height, const float *input, + const float *expected, const CNN_CONFIG *cnn_config, + int in_stride, CNN_THREAD_DATA *thread_data, + double tolerance) { + int out_width, out_height, out_channels; + av1_find_cnn_output_size(image_width, image_height, cnn_config, &out_width, + &out_height, &out_channels); + + const int out_size = out_width * out_height; + const int out_stride = out_width; + + float *output_ = + (float *)aom_malloc(sizeof(*output_) * out_size * out_channels); + ASSERT_NE(output_, nullptr); + float *output[CNN_MAX_CHANNELS] = { nullptr }; + for (int channel = 0; channel < out_channels; ++channel) { + output[channel] = output_ + (channel * out_size); + } + const int num_outputs = 1; + const int output_chs[1] = { out_channels }; + const int output_strides[1] = { out_stride }; + CNN_MULTI_OUT output_struct = { num_outputs, output_chs, output_strides, + output }; + + RunMultiOutCNNTest(&input, image_width, image_height, in_stride, cnn_config, + thread_data, &output_struct, &expected, tolerance); + + aom_free(output_); + } + + static void RunMultiOutCNNTest(const float **input, int image_width, + int image_height, int in_stride, + const CNN_CONFIG *cnn_config, + CNN_THREAD_DATA *thread_data, + CNN_MULTI_OUT *output, const float **expected, + double tolerance) { + const int num_outputs = output->num_outputs; + const int *output_chs = output->output_channels; + + int *out_widths = (int *)aom_calloc(num_outputs, sizeof(*out_widths)); + int *out_heights = (int *)aom_calloc(num_outputs, sizeof(*out_heights)); + int *not_used = (int *)aom_calloc(num_outputs, sizeof(*not_used)); + ASSERT_NE(out_widths, nullptr); + ASSERT_NE(out_heights, nullptr); + ASSERT_NE(not_used, nullptr); + + av1_find_cnn_output_size(image_width, image_height, cnn_config, out_widths, + out_heights, not_used); + ASSERT_TRUE(av1_cnn_predict(input, image_width, image_height, in_stride, + cnn_config, thread_data, output)); + + int channel_offset = 0; + for (int output_idx = 0; output_idx < num_outputs; output_idx++) { + const float *expected_out = expected[output_idx]; + const int curr_output_chs = output_chs[output_idx]; + const int out_size = out_widths[output_idx] * out_heights[output_idx]; + + double mse = 0; + int expected_ite = 0; + for (int channel = 0; channel < curr_output_chs; ++channel) { + const float *buf_out = output->output_buffer[channel_offset]; + + for (int i = 0; i < out_size; ++i) { + EXPECT_NEAR(expected_out[expected_ite], buf_out[i], + PIXELWISE_FLOAT_TOL) + << " output " << output_idx << " channel " << channel << " pixel " + << expected_ite % out_size << ": " << expected_out[expected_ite] + << "/" << buf_out[i] << std::endl; + mse += SQR(expected_out[expected_ite] - buf_out[i]); + expected_ite++; + } + + channel_offset++; + } + mse /= (out_size * curr_output_chs); + EXPECT_LE(mse, tolerance) << " output " << output_idx << std::endl; + } + + aom_free(out_widths); + aom_free(out_heights); + aom_free(not_used); + } + + static void AssignLayerWeightsBiases(CNN_CONFIG *cnn_config, float *weights, + float *bias) { + size_t weight_offset = 0; + size_t bias_offset = 0; + for (int layer = 0; layer < cnn_config->num_layers; ++layer) { + CNN_LAYER_CONFIG *layer_config = &cnn_config->layer_config[layer]; + layer_config->weights = weights + weight_offset; + layer_config->bias = bias + bias_offset; + weight_offset += layer_config->filter_width * + layer_config->filter_height * layer_config->in_channels * + layer_config->out_channels; + bias_offset += layer_config->out_channels; + + ASSERT_NE(layer_config->weights, nullptr); + ASSERT_NE(layer_config->bias, nullptr); + } + } +}; + +} // namespace + +TEST_F(CNNTest, TestMultilayerConvolution) { + int image_height = 16; + int image_width = 16; + int filter_height = 5; + int filter_width = 4; + + float input[] = { + -3, 1, -3, 2, -2, -2, 2, -2, 1, -2, -3, 1, 2, 2, 2, -2, 0, 1, -1, + -3, -1, -1, 1, 0, -3, 1, 0, -1, 1, 0, 0, -3, -3, -3, 0, 2, 1, -1, + 2, 0, 1, -3, -1, 2, 2, 1, -2, 0, -1, 0, -2, -2, -1, 1, 0, 0, 0, + -2, -2, -2, 1, 1, -2, 1, 1, -2, -2, 1, -2, -1, -2, -3, 2, -3, -1, 1, + 0, -2, -2, -2, 1, -2, -2, -1, -1, 2, 2, 2, -1, 1, -3, -3, 0, 2, 0, + 2, 1, -3, -3, 1, 2, 2, 1, -2, -3, 0, -3, 0, -3, -2, 0, 1, 1, 0, + -3, 2, -1, 2, 1, 0, 1, -2, 1, -1, -1, 2, 0, -2, -3, 1, 1, -2, -1, + -3, -3, -1, 0, -3, -2, 0, 0, 1, 0, -3, -2, -1, 1, 0, 2, 1, 0, -3, + -2, -3, -3, -1, 0, -2, 2, -1, -3, 0, -1, -1, 2, 0, -3, -2, -1, 0, 0, + 1, -2, 1, 2, 1, 2, 2, -3, 2, -1, 0, 0, -1, 0, 2, 2, -1, 2, -2, + 1, 1, -3, -3, 1, -1, -1, -2, 2, -2, -2, 2, -1, -3, 2, -3, 1, -1, -1, + -3, 1, -1, 1, 0, -3, -3, 1, -3, -3, 0, 2, 2, -2, -1, 2, 0, 2, 1, + -1, -3, 0, 0, -1, -1, 1, 0, 2, 0, -3, 2, 1, 0, 1, -3, 2, -3, -3, + -1, -3, -3, 2, 0, 2, -2, 1, -1, + }; + + float weights[] = { + -2, 2, -2, 2, -1, -3, 2, 2, 0, 0, -3, -1, -2, -3, 1, -1, 0, 0, 0, + 2, -2, 2, -2, -3, 1, 1, 1, -3, -1, 0, 1, 2, -2, 0, -1, -3, -1, -2, + 2, -3, -3, 1, -2, -3, 0, 2, 1, -3, -3, -1, -3, -2, -1, -3, -1, -3, -2, + -1, -3, -1, -2, -2, -3, 2, 0, -3, 0, -3, -3, 1, -3, -1, 0, -1, 1, 1, + -1, 1, -2, 0, 2, 0, -3, 1, -1, -1, 2, 0, 1, -3, -3, 1, 2, -3, -3, + 1, -3, 2, 0, -3, 1, 2, 2, -2, -1, -2, 1, 1, 0, -2, -2, 1, 2, -1, + -3, 1, -2, 2, -3, -2, -3, 2, 1, 0, -2, 0, 1, -3, 2, -2, -2, 0, 2, + -3, 2, 0, 0, 1, -2, 1, 1, -2, -1, -2, 1, -2, 0, -2, -2, 0, -1, -1, + -3, -3, -3, 1, -3, -2, 2, -1, 2, 0, 2, -2, 2, -2, 1, -3, -3, -1, 0, + 2, 2, 1, -1, -3, -1, -3, 2, 1, -2, 0, -3, -1, -3, -1, 2, 1, 0, 2, + -1, 1, 0, 1, 2, -1, -2, 2, 1, -3, -1, -3, 0, 1, -2, 0, -2, -3, 0, + -2, 2, 2, 0, 0, 2, -3, 2, -3, -2, 1, 2, -3, -3, -1, -3, 0, -3, -3, + -2, -2, -2, 0, 0, 1, 0, 0, -1, 0, 0, -3, 0, -3, -1, -2, 1, -2, -1, + 2, -2, 0, 0, 1, 0, -2, -1, 0, -3, 1, 0, -1, -3, 1, -1, 1, -1, -3, + 1, 0, 1, 1, -1, 2, 2, 0, 0, 1, -3, 2, -2, -2, -3, -2, -1, -2, 2, + 0, 2, -2, -3, -1, -3, 2, 2, -1, 2, 2, -1, 0, -3, 1, + }; + + float bias[] = { + 1, -1, 0, 1, 1, 1, -2, + }; + + float expected_same[] = { + -1125, 2926, 6406, 631, -1244, 97, -1454, 2526, 1065, 3292, 3464, + 2553, -330, 532, 1038, 1182, -402, 3758, 3392, 9854, 4365, 1408, + 4736, 3134, 3838, 2409, 3221, 4350, 6750, 4045, 815, 1188, 2959, + 9802, 9590, 4572, 5740, 4253, 1701, 7974, 7012, 6854, 7093, 3907, + 4539, 3886, 4267, 3505, 465, 7824, 9219, 10026, 7968, 957, 2295, + 5594, 10811, 9641, 5950, 10043, 8783, 3132, 1421, 1110, 4108, 13929, + 10660, -84, -61, 3932, -180, 6811, 13393, 15147, 15640, 9337, 6961, + 3808, 1604, 1398, 1047, 6739, 10144, 6517, 4698, 2678, 7389, 2595, + 5248, 12075, 11272, 13951, 8820, 1090, 2199, 2206, 2788, 12116, 6683, + 2612, -291, 3183, 9414, 12316, 14524, 12333, 13208, 7832, 4664, 4657, + 3534, 1298, -666, 4250, 7707, 9103, 5760, 688, 9571, 15782, 14203, + 14878, 17339, 14684, 8690, 5671, 875, 1429, 1531, 6173, 2984, 5558, + 2996, 7928, 6733, 16117, 15262, 12757, 7980, 3923, 4795, 5973, 2051, + 455, -1922, 1816, 5906, 3321, 10908, 10910, 7377, 12204, 12809, 11195, + 7451, 6666, 74, -1645, -35, -391, 3813, 7324, 892, 1656, 6095, + 12193, 14648, 12156, 14663, 10251, 10325, 7821, 3925, 323, 697, 442, + 1324, 4669, 7002, 5485, 5171, 5086, 10582, 11053, 9709, 11353, 8543, + 5256, 2873, 235, -628, 1496, 1878, -867, 3420, 6865, 5937, 10182, + 13277, 10069, 10789, 5998, 624, -2082, 4417, 1258, -1080, -819, -1430, + 1033, 5220, 6335, 8471, 8980, 11908, 14430, 12584, 8404, 1576, -803, + 985, 1481, 1367, -193, 873, 3684, 2288, 6676, 9477, 11155, 9602, + 9707, 10507, 4739, 3174, -575, -178, 3002, 1710, 423, -477, 554, + 3088, 2029, 5113, 5000, 3771, 6090, 5365, 1185, 2855, 399, -312, + -1577, 176, 955, + }; + + float expected_replicate[] = { + 13768, 13528, 12999, 6906, 4618, 4043, 2611, 9955, 6685, 4776, 2753, + 1036, 3063, 4544, 5183, 7349, 12451, 12501, 9131, 12753, 8908, 4058, + 6299, 7542, 7115, 3307, 3360, 3543, 9754, 7808, 5991, 9019, 14320, + 14919, 12492, 6871, 7373, 3336, 2085, 10604, 9377, 6882, 5009, 3103, + 6220, 6278, 7588, 10196, 11045, 11563, 11842, 11911, 8279, 2030, 1858, + 6368, 12123, 9909, 6347, 10345, 9365, 4038, 1673, 3051, 16492, 16649, + 12276, 408, -301, 4122, -654, 7864, 14038, 15279, 15315, 9744, 8243, + 5298, 746, 380, 9824, 9124, 10895, 6640, 4712, 2669, 6980, 2759, + 5385, 12345, 11336, 13129, 8600, 2370, 3682, 5219, 12407, 13123, 6784, + 2612, -291, 3183, 9414, 12316, 14524, 12333, 13397, 7543, 3916, 4153, + 4477, 4314, 7983, 8418, 9163, 9103, 5760, 688, 9571, 15782, 14203, + 14878, 17718, 14570, 7940, 6642, 5094, 7133, 9964, 10219, 3224, 5558, + 2996, 7928, 6733, 16117, 15262, 12757, 7958, 4401, 5187, 5476, 5529, + 6055, 2206, 3909, 6015, 3321, 10908, 10910, 7377, 12204, 12809, 11195, + 6967, 6840, 481, -1600, 274, 1, 10373, 8514, 1123, 2117, 6758, + 12736, 16223, 13585, 15988, 11771, 10600, 7918, 4156, 2840, 3111, 3287, + 6359, 7652, 8813, 6530, 6967, 7789, 13671, 13990, 13247, 13241, 9836, + 5251, 3024, 2313, 1834, 4187, 2637, -1312, 2139, 7378, 7665, 11933, + 15591, 15314, 15678, 9531, 2820, -1516, 3400, 1314, 22, 363, -2896, + -898, 5906, 7308, 10650, 12975, 16978, 20370, 18817, 12381, 4118, -861, + -137, 236, 1802, 1632, -350, 2334, 3400, 8680, 14064, 18216, 18675, + 21765, 22871, 11491, 4937, -1555, -11, 1669, 2392, 3265, -5254, -217, + 5001, 8063, 13444, 18884, 19706, 22794, 21064, 9545, 6689, -7, 289, + -2021, 504, 2347, + }; + + float expected_valid[] = { + 2612, -291, 3183, 9414, 12316, 14524, 12333, 9103, 5760, 688, + 9571, 15782, 14203, 14878, 5558, 2996, 7928, 6733, 16117, 15262, + 12757, 3321, 10908, 10910, 7377, 12204, 12809, 11195, + }; + + CNN_CONFIG cnn_config = { 3, + 0, + 0, + 0, + 0, + { + { + 1, + filter_width, + filter_height, + 3, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + 3, + filter_width, + filter_height, + 3, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + 3, + filter_width, + filter_height, + 1, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + }, + } }; + + // Weights and biases need to be specified separately because + // of the offset. + AssignLayerWeightsBiases(&cnn_config, weights, bias); + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected_same, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + for (int i = 0; i < cnn_config.num_layers; ++i) { + cnn_config.layer_config[i].pad = PADDING_SAME_REPLICATE; + } + + RunCNNTest(image_width, image_height, input, expected_replicate, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + for (int i = 0; i < cnn_config.num_layers; ++i) { + cnn_config.layer_config[i].pad = PADDING_VALID; + } + + RunCNNTest(image_width, image_height, input, expected_valid, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); +} + +TEST_F(CNNTest, TestRELUSingleLayer) { + int image_width = 8; + int image_height = 8; + int filter_height = 5; + int filter_width = 4; + float input[] = { + 0, -2, -3, 1, -1, 2, -2, 1, -3, -1, 0, 1, -2, -3, -2, -2, + 1, -3, 2, -3, -1, -1, 2, 0, -2, -3, 0, -2, -3, 1, -1, -1, + 2, -2, 0, -2, -3, -3, 1, 1, -1, 1, 0, 1, -3, 0, 2, 2, + 0, -3, 1, -3, 2, -2, 1, -1, -1, -2, -3, -2, -1, -3, -2, -1, + }; + float expected_same[] = { + 9, 0, 1, 1, 0, 3, 0, 19, 0, 12, 10, 0, 0, 0, 5, 0, + 0, 18, 21, 7, 19, 4, 3, 0, 0, 9, 16, 0, 11, 16, 0, 11, + 12, 2, 0, 11, 0, 16, 6, 0, 8, 22, 13, 10, 12, 0, 0, 0, + 0, 1, 2, 12, 29, 6, 10, 0, 13, 0, 0, 5, 8, 10, 0, 0, + }; + float expected_replicate[] = { + 18, 17, 12, 2, 0, 0, 5, 11, 0, 17, 22, 6, 0, 0, 17, 0, + 0, 18, 21, 7, 19, 4, 3, 5, 3, 9, 16, 0, 11, 16, 0, 3, + 3, 2, 0, 11, 0, 16, 6, 0, 17, 22, 13, 10, 12, 0, 0, 0, + 0, 4, 1, 10, 30, 7, 10, 0, 23, 8, 0, 13, 15, 19, 8, 10, + }; + float expected_valid[] = { + 18, 21, 7, 19, 4, 9, 16, 0, 11, 16, 2, 0, 11, 0, 16, 22, 13, 10, 12, 0, + }; + float weights[] = { + -2, -3, 1, 2, 2, -2, -3, 0, -3, 2, 2, -3, -3, -2, 0, 1, 2, 0, -1, -1, + }; + float bias[] = { -3 }; + + CNN_CONFIG cnn_config = { 1, + 0, + 0, + 0, + 0, + { { + 1, + filter_width, + filter_height, + 1, + 1, + 1, + 0, + weights, + bias, + PADDING_SAME_ZERO, + RELU, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + } } }; + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected_same, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + cnn_config.layer_config[0].pad = PADDING_SAME_REPLICATE; + + RunCNNTest(image_width, image_height, input, expected_replicate, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + cnn_config.layer_config[0].pad = PADDING_VALID; + + RunCNNTest(image_width, image_height, input, expected_valid, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); +} + +TEST_F(CNNTest, TestVaryingStridesVaryingDimImages) { + float weights[] = { + 1, -5, -3, -4, -1, 1, 2, -3, 2, 2, -1, 1, -5, 1, 1, + -3, -5, 3, 1, 4, -2, -5, -2, -3, -5, 0, -1, -5, 2, -2, + -2, 1, -2, -4, 1, 3, -2, 2, 0, -3, 2, -3, -2, -3, + }; + float bias[] = { 2 }; + + CNN_CONFIG cnn_config = { 1, + 0, + 0, + 0, + 0, + { + { + 1, + 4, + 11, + 1, + 7, + 6, + 0, + weights, + bias, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + }, + } }; + + int image_height = 24; + int image_width = 17; + float input[] = { + -1, -3, 4, 4, -5, 4, 3, -5, -1, -3, 4, -4, 2, -3, 3, -5, 2, -1, -5, + 1, -1, 3, 1, -3, -3, 4, 0, 2, -3, -5, -5, -4, 0, -5, -2, -3, -1, -2, + 2, -5, 4, 4, 0, -4, -3, 1, -3, -5, -4, -4, 1, -2, -3, 3, -3, -3, -1, + -5, -5, -2, 3, 1, -1, -5, -5, 1, -4, -2, -1, -2, -4, -4, 2, -2, 2, 1, + -2, -4, -1, 1, -2, -5, 3, -2, -1, -1, -5, -3, 1, -2, -2, -3, -1, -2, -4, + -2, 1, -4, -1, 4, 3, -4, 0, 4, 2, 2, 4, -3, -5, 2, 2, 1, -1, -4, + -2, 1, 3, 2, 0, 4, -1, -3, 2, 1, -4, 2, 2, -4, -2, 0, -2, -1, 4, + 4, 2, 3, -4, 2, -4, -5, 4, -1, -3, -1, 0, -4, 1, 3, -1, -3, -5, 3, + -2, -4, 1, 2, -2, -3, -3, -5, 1, -3, -1, 0, -1, 3, -4, -1, -5, -5, 1, + 0, 0, -2, -2, 2, -2, 0, 0, 2, 0, -3, 0, -1, -4, -4, -1, 3, -4, -4, + -1, 0, -5, -3, -2, 4, -3, -4, -4, 0, -5, 1, -2, -3, -3, -4, 4, 3, 4, + 3, 3, -1, 3, 1, -3, -2, 3, 3, 0, 2, -4, -3, 2, 2, 0, -2, 4, -2, + 2, -2, -1, -4, -2, 2, -4, 3, -1, 4, 1, 1, 4, -1, -4, -4, 1, 1, -2, + 4, -1, 3, 2, -3, 4, 3, 1, 4, 0, -4, 2, 0, 2, 4, -2, -2, 4, 2, + -1, -2, 1, -3, 2, 3, -5, -3, 4, 4, 2, -5, -4, -5, -2, -4, 2, 0, 2, + -5, 4, -4, -2, -5, 2, 1, 0, 4, 1, -2, -3, -4, -3, -4, 3, 3, 2, 0, + -3, 1, -5, 4, 0, 4, -1, 3, -5, -5, -2, -1, -1, 4, 3, 3, 4, 3, -4, + 4, -3, -3, -1, -4, -1, -4, -1, -2, 4, -2, -4, 4, 4, -3, -4, -1, 1, 2, + -1, -2, -2, 3, 2, 2, -3, 0, -1, 0, 3, 2, -5, 0, -4, 0, 0, 2, -4, + -1, -1, 0, -2, 0, 1, 0, 0, 4, -5, -1, -5, 2, -1, 0, 2, -1, 1, 3, + -3, -5, -2, -3, 4, -2, -2, -1, -3, -4, -1, -2, -4, 1, 4, -3, -2, -1, 3, + -3, -2, 3, 2, 1, -4, -3, -5, 1, + }; + float expected_1[] = { + 41, -26, 5, 76, 13, 83, -21, 53, -54, -14, 21, 121, + }; + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected_1, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + cnn_config.layer_config[0].skip_width = 6; + cnn_config.layer_config[0].skip_height = 7; + + float expected_2[] = { + 21, -50, 41, 20, 72, 127, -21, 103, 62, -37, 83, -3, + }; + RunCNNTest(image_width, image_height, input, expected_2, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + cnn_config.layer_config[0].skip_width = 3; + cnn_config.layer_config[0].skip_height = 10; + + float expected_3[] = { + -26, -21, -35, 69, 49, 4, -51, -43, -56, + -41, 15, -44, 40, -62, 63, 38, 27, 47, + }; + RunCNNTest(image_width, image_height, input, expected_3, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + cnn_config.layer_config[0].skip_width = 10; + cnn_config.layer_config[0].skip_height = 3; + + float expected_4[] = { + 21, 49, 28, 87, 50, 40, 102, 81, 58, 85, 51, 66, 36, 19, -37, -45, + }; + + RunCNNTest(image_width, image_height, input, expected_4, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); +} + +TEST_F(CNNTest, TestMaxPool) { + int image_width = 8; + int image_height = 8; + int stride = 3; + float input[] = { + 1, -4, -4, 8, 0, 7, -5, -2, 8, 2, 2, 8, 5, -1, -1, 9, + -3, 0, -2, 0, 6, 3, -4, 8, 7, 8, 7, -1, 4, -1, 0, 2, + -5, -2, 8, 5, 5, 4, 2, 7, 4, 6, 2, 8, 8, -4, -3, -4, + -3, -1, 2, 3, 3, 6, -5, 8, 9, 5, 0, -2, -1, 6, 5, 7, + }; + + float expected[] = { + 49, 58, 70, 68, 68, 70, 48, 57, 88, + }; + + float weights[] = { + 3, 1, 3, 4, -1, 5, -2, 1, -4, + }; + + float bias[] = { + -3, + }; + + CNN_CONFIG cnn_config = { 1, + 0, + 0, + 0, + 0, + { { + 1, + 3, + 3, + 1, + stride, + stride, + 1, + weights, + bias, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + } } }; + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); +} + +TEST_F(CNNTest, TestDeconvolveNonActivationSingleLayerSingleKernel) { + int image_width = 4; + int image_height = 7; + float input[] = { + 9, 6, 181, 9, 218, 30, 80, 108, 68, 216, 70, 128, 179, 228, + 33, 212, 34, 14, 48, 27, 230, 23, 202, 113, 80, 56, 122, 112, + }; + + float expected_1_same[] = { + 15, -30, 36, -525, 377, -193, 558, 531, 6, -24, -15, 124, + 166, -561, -356, -754, -3, -3, -3, -3, -3, -3, -3, -3, + 433, -311, 711, 381, 247, -317, 453, 129, 215, -627, -409, -885, + 17, -255, -55, -647, -3, -3, -3, -3, -3, -3, -3, -3, + 133, -719, 633, -225, 785, 191, 463, 79, 65, 9, 77, -853, + -365, -949, -15, -667, -3, -3, -3, -3, -3, -3, -3, -3, + 355, -866, 990, 207, 747, 12, 520, -116, 176, -312, -133, -1370, + -426, -802, 143, -771, -3, -3, -3, -3, -3, -3, -3, -3, + 65, -79, 127, -59, 135, -90, 195, 114, 31, -91, -57, -133, + 17, -176, -72, -276, -3, -3, -3, -3, -3, -3, -3, -3, + 457, -302, 733, 58, 470, -475, 829, 490, 227, -670, -440, -790, + 153, -588, -294, -1150, -3, -3, -3, -3, -3, -3, -3, -3, + 157, -251, 349, -185, 409, -293, 587, 251, 77, -187, -107, -369, + 7, -481, -135, -827, -3, -3, -3, -3, -3, -3, -3, -3, + }; + float expected_1_valid[] = { + -30, 15, -30, 36, -525, 377, -193, 558, 531, 24, 24, 6, + 6, -24, -15, 124, 166, -561, -356, -754, -21, -39, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -657, 433, -311, + 711, 381, 247, -317, 453, 129, 321, 321, 215, 215, -627, -409, + -885, 17, -255, -55, -647, -219, -435, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -207, 133, -719, 633, -225, 785, + 191, 463, 79, 381, 381, 65, 65, 9, 77, -853, -365, -949, + -15, -667, -259, -515, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -540, 355, -866, 990, 207, 747, 12, 520, -116, + 633, 633, 176, 176, -312, -133, -1370, -426, -802, 143, -771, -427, + -851, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -105, 65, -79, 127, -59, 135, -90, 195, 114, 78, 78, 31, + 31, -91, -57, -133, 17, -176, -72, -276, -57, -111, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -693, 457, -302, + 733, 58, 470, -475, 829, 490, 336, 336, 227, 227, -670, -440, + -790, 153, -588, -294, -1150, -229, -455, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -243, 157, -251, 349, -185, 409, + -293, 587, 251, 333, 333, 77, 77, -187, -107, -369, 7, -481, + -135, -827, -227, -451, + }; + float weights_1[] = { -3, 2, -1, 3, 3, 1, 1, -3, -2, -4 }; + float bias_1[] = { -3 }; + + CNN_CONFIG cnn_config = { 1, + 0, + 0, + 0, + 0, + { { + 1, + 5, + 2, + 1, + 2, + 3, + 0, + weights_1, + bias_1, + PADDING_SAME_ZERO, + NONE, + 1, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + } } }; + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected_1_same, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + // Change padding to valid + cnn_config.layer_config[0].pad = PADDING_VALID; + + RunCNNTest(image_width, image_height, input, expected_1_valid, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + float expected_12_same[] = { + 15, -12, 6, 36, -9, -528, 377, -184, 513, 558, -12, 24, + 6, -30, -15, -33, -21, 166, 154, -546, -356, -718, -30, -21, + 433, -221, 561, 711, -33, -153, 247, -83, -87, 453, -111, 321, + 215, -657, -409, -845, -93, 17, -43, -243, -55, -215, -327, -219, + 133, -71, -447, 633, -219, 435, 785, -73, -177, 463, -131, 381, + 65, -207, 77, -59, -651, -365, -797, -213, -15, -155, -387, -259, + 355, -182, -150, 990, -231, 582, 747, -36, -540, 520, -215, 633, + 176, -540, -133, -491, -687, -426, -882, -102, 143, 77, -639, -427, + 65, -37, 57, 127, -17, -105, 135, -51, 60, 195, -30, 78, + 31, -105, -57, -125, -45, 17, -11, -147, -72, -168, -84, -57, + 457, -233, 618, 733, -26, -540, 470, -205, 264, 829, -116, 336, + 227, -693, -440, -900, -72, 153, 107, -609, -294, -698, -342, -229, + 157, -83, 69, 349, -59, -201, 409, -125, 27, 587, -115, 333, + 77, -243, -107, -267, -171, 7, -105, -369, -135, -379, -339, -227, + }; + float expected_12_valid[] = { + -30, 15, -12, 6, 36, -9, -528, 377, -184, 513, 558, -12, + 24, 24, 6, 6, -30, -15, -33, -21, 166, 154, -546, -356, + -718, -30, -21, -39, -657, 433, -221, 561, 711, -33, -153, 247, + -83, -87, 453, -111, 321, 321, 215, 215, -657, -409, -845, -93, + 17, -43, -243, -55, -215, -327, -219, -435, -207, 133, -71, -447, + 633, -219, 435, 785, -73, -177, 463, -131, 381, 381, 65, 65, + -207, 77, -59, -651, -365, -797, -213, -15, -155, -387, -259, -515, + -540, 355, -182, -150, 990, -231, 582, 747, -36, -540, 520, -215, + 633, 633, 176, 176, -540, -133, -491, -687, -426, -882, -102, 143, + 77, -639, -427, -851, -105, 65, -37, 57, 127, -17, -105, 135, + -51, 60, 195, -30, 78, 78, 31, 31, -105, -57, -125, -45, + 17, -11, -147, -72, -168, -84, -57, -111, -693, 457, -233, 618, + 733, -26, -540, 470, -205, 264, 829, -116, 336, 336, 227, 227, + -693, -440, -900, -72, 153, 107, -609, -294, -698, -342, -229, -455, + -243, 157, -83, 69, 349, -59, -201, 409, -125, 27, 587, -115, + 333, 333, 77, 77, -243, -107, -267, -171, 7, -105, -369, -135, + -379, -339, -227, -451, + }; + + // Change skip_width, skip_height to {2, 3} + cnn_config.layer_config[0].skip_width = 3; + cnn_config.layer_config[0].skip_height = 2; + // Set padding to same + cnn_config.layer_config[0].pad = PADDING_SAME_ZERO; + + RunCNNTest(image_width, image_height, input, expected_12_same, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + // Change padding to valid + cnn_config.layer_config[0].pad = PADDING_VALID; + RunCNNTest(image_width, image_height, input, expected_12_valid, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + cnn_config.layer_config[0].filter_width = 4; + cnn_config.layer_config[0].filter_height = 3; + float weights_2[] = { -1, -3, -1, -3, 0, 2, -2, 4, 3, 0, 1, 4 }; + float bias_2[] = { -4 }; + cnn_config.layer_config[0].weights = weights_2; + cnn_config.layer_config[0].bias = bias_2; + + cnn_config.layer_config[0].skip_width = 5; + cnn_config.layer_config[0].skip_height = 2; + float expected_2_same[] = { + -13, -31, -13, -31, -4, -10, -22, -10, -22, -4, -185, -547, + -185, -547, -4, -13, -31, -13, -31, -4, -4, 14, -22, 32, + -4, -4, 8, -16, 20, -4, -4, 358, -366, 720, -4, -4, + 14, -22, 32, -4, -195, -658, -213, -622, -4, -16, -94, -28, + -70, -4, 459, -244, 97, 480, -4, -85, -328, -103, -292, -4, + -4, 432, -440, 868, -4, -4, 56, -64, 116, -4, -4, 156, + -164, 316, -4, -4, 212, -220, 428, -4, 582, -208, 146, 664, + -4, -130, -652, -190, -532, -4, 166, -214, 6, 106, -4, 192, + -388, -24, 44, -4, -4, 132, -140, 268, -4, -4, 428, -436, + 860, -4, -4, 136, -144, 276, -4, -4, 252, -260, 508, -4, + 21, -541, -115, -269, -4, 416, -688, -16, 176, -4, 173, -103, + 33, 177, -4, 168, -640, -88, -128, -4, -4, 354, -362, 712, + -4, -4, 452, -460, 908, -4, -4, 62, -70, 128, -4, -4, + 420, -428, 844, -4, 499, -106, 141, 610, -4, 666, -46, 210, + 866, -4, 47, -148, -19, -16, -4, 605, -85, 181, 763, -4, + -4, 64, -72, 132, -4, -4, 24, -32, 52, -4, -4, 92, + -100, 188, -4, -4, 50, -58, 104, -4, -132, -694, -200, -558, + -4, 15, -73, -13, -17, -4, -62, -610, -158, -418, -4, -36, + -343, -90, -235, -4, -4, 456, -464, 916, -4, -4, 42, -50, + 88, -4, -4, 400, -408, 804, -4, -4, 222, -230, 448, -4, + 606, -244, 146, 676, -4, 9, -172, -37, -80, -4, 480, -370, + 76, 438, -4, 223, -340, -3, 112, -4, -4, 156, -164, 316, + -4, -4, 108, -116, 220, -4, -4, 240, -248, 484, -4, -4, + 220, -228, 444, -4, + }; + float expected_2_valid[] = { + -13, -31, -13, -31, -4, -10, -22, -10, -22, -4, -185, -547, + -185, -547, -4, -13, -31, -13, -31, -4, 14, -22, 32, -4, + -4, 8, -16, 20, -4, -4, 358, -366, 720, -4, -4, 14, + -22, 32, -195, -658, -213, -622, -4, -16, -94, -28, -70, -4, + 459, -244, 97, 480, -4, -85, -328, -103, -292, -4, 432, -440, + 868, -4, -4, 56, -64, 116, -4, -4, 156, -164, 316, -4, + -4, 212, -220, 428, 582, -208, 146, 664, -4, -130, -652, -190, + -532, -4, 166, -214, 6, 106, -4, 192, -388, -24, 44, -4, + 132, -140, 268, -4, -4, 428, -436, 860, -4, -4, 136, -144, + 276, -4, -4, 252, -260, 508, 21, -541, -115, -269, -4, 416, + -688, -16, 176, -4, 173, -103, 33, 177, -4, 168, -640, -88, + -128, -4, 354, -362, 712, -4, -4, 452, -460, 908, -4, -4, + 62, -70, 128, -4, -4, 420, -428, 844, 499, -106, 141, 610, + -4, 666, -46, 210, 866, -4, 47, -148, -19, -16, -4, 605, + -85, 181, 763, -4, 64, -72, 132, -4, -4, 24, -32, 52, + -4, -4, 92, -100, 188, -4, -4, 50, -58, 104, -132, -694, + -200, -558, -4, 15, -73, -13, -17, -4, -62, -610, -158, -418, + -4, -36, -343, -90, -235, -4, 456, -464, 916, -4, -4, 42, + -50, 88, -4, -4, 400, -408, 804, -4, -4, 222, -230, 448, + 606, -244, 146, 676, -4, 9, -172, -37, -80, -4, 480, -370, + 76, 438, -4, 223, -340, -3, 112, -4, 156, -164, 316, -4, + -4, 108, -116, 220, -4, -4, 240, -248, 484, -4, -4, 220, + -228, 444, 236, -4, 76, 316, -4, 164, -4, 52, 220, -4, + 362, -4, 118, 484, -4, 332, -4, 108, 444, + }; + // Set padding to same + cnn_config.layer_config[0].pad = PADDING_SAME_ZERO; + + RunCNNTest(image_width, image_height, input, expected_2_same, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + cnn_config.layer_config[0].pad = PADDING_VALID; + + RunCNNTest(image_width, image_height, input, expected_2_valid, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + cnn_config.layer_config[0].skip_width = 2; + cnn_config.layer_config[0].skip_height = 5; + float expected_21_same[] = { + -31, -19, -49, -191, -565, -194, -574, -13, 14, -22, 44, -16, + 382, -366, 738, -22, -4, 23, 32, 545, 20, 204, 720, 5, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -658, -252, -748, -114, -334, -192, -568, -112, + 432, -440, 928, -64, 276, -164, 532, -220, -4, 304, 868, 266, + 116, 400, 316, 104, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -208, -288, -856, -290, + -862, -202, -598, -132, 132, -140, 700, -436, 1000, -144, 532, -260, + -4, 712, 268, 422, 860, 450, 276, 124, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -541, -411, -1225, -265, -787, -249, -739, -216, 354, -362, 1168, -460, + 974, -70, 552, -428, -4, 859, 712, 323, 908, 665, 128, 208, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -106, -52, -148, -66, -190, -79, -229, -31, + 64, -72, 160, -32, 148, -100, 242, -58, -4, 72, 132, 154, + 52, 125, 188, 23, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -694, -257, -763, -229, + -679, -319, -949, -117, 456, -464, 962, -50, 492, -408, 1030, -230, + -4, 295, 916, 625, 88, 537, 804, 109, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -244, -140, -412, -182, -538, -238, -706, -116, 156, -164, 428, -116, + 464, -248, 708, -228, -4, 244, 316, 418, 220, 454, 484, 108, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, + }; + float expected_21_valid[] = { + -13, -31, -19, -49, -191, -565, -194, -574, -13, -31, -4, 14, + -22, 44, -16, 382, -366, 738, -22, 32, 23, -4, 23, 32, + 545, 20, 204, 720, 5, 32, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -222, -658, -252, -748, -114, -334, -192, -568, -112, -328, + -4, 432, -440, 928, -64, 276, -164, 532, -220, 428, 650, -4, + 304, 868, 266, 116, 400, 316, 104, 428, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -72, -208, -288, -856, -290, -862, -202, -598, + -132, -388, -4, 132, -140, 700, -436, 1000, -144, 532, -260, 508, + 200, -4, 712, 268, 422, 860, 450, 276, 124, 508, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -183, -541, -411, -1225, -265, -787, + -249, -739, -216, -640, -4, 354, -362, 1168, -460, 974, -70, 552, + -428, 844, 533, -4, 859, 712, 323, 908, 665, 128, 208, 844, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -38, -106, -52, -148, + -66, -190, -79, -229, -31, -85, -4, 64, -72, 160, -32, 148, + -100, 242, -58, 104, 98, -4, 72, 132, 154, 52, 125, 188, + 23, 104, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -234, -694, + -257, -763, -229, -679, -319, -949, -117, -343, -4, 456, -464, 962, + -50, 492, -408, 1030, -230, 448, 686, -4, 295, 916, 625, 88, + 537, 804, 109, 448, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -84, -244, -140, -412, -182, -538, -238, -706, -116, -340, -4, 156, + -164, 428, -116, 464, -248, 708, -228, 444, 236, -4, 244, 316, + 418, 220, 454, 484, 108, 444, + }; + + cnn_config.layer_config[0].pad = PADDING_SAME_ZERO; + + RunCNNTest(image_width, image_height, input, expected_21_same, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); + + cnn_config.layer_config[0].pad = PADDING_VALID; + + RunCNNTest(image_width, image_height, input, expected_21_valid, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); +} + +TEST_F(CNNTest, TestLargeKernelsAndStrides) { + float input_10x11[] = { + 4, 4, 2, 4, 2, -5, -2, 3, -1, 0, 0, 1, 2, 0, -5, -2, -5, 1, -3, + -1, 4, -3, 2, -2, 1, 0, 1, -3, -3, -4, -2, -2, 1, -4, -1, 4, 1, -4, + -4, -4, 3, 2, -5, 3, -5, 1, 2, -4, 1, -1, 3, 4, -2, 3, -3, 3, 0, + 2, -4, -5, -5, -2, -1, -2, 1, 1, 1, -2, 4, -5, 4, -1, -1, 2, 3, -4, + 2, 2, 3, 0, 0, 1, 0, 3, 2, 3, 1, -2, 3, -4, 3, 2, 4, -2, 0, + 4, -4, 1, -3, -3, -3, -5, 1, -3, -5, 0, 4, -1, -3, 2, + }; + + float weights_10x11[] = { + -3, 4, -4, -3, -5, 1, -2, 3, 1, -4, -4, 0, -1, 0, 3, 1, -3, -2, 0, + -1, 1, 3, -4, -4, -3, -3, -2, 4, 3, -5, 4, 2, -3, 4, -2, -1, 2, -1, + -5, 0, -3, 0, 3, -5, -5, 3, -4, -1, -5, 3, 4, 0, 4, -5, 2, -1, 2, + -1, -1, -1, -5, 0, -4, 3, -1, 1, 1, -1, 3, 2, -5, -4, 0, -4, 4, -5, + -3, 4, -5, 2, -5, -4, -4, -1, 3, 3, 0, 2, -4, 1, -2, 1, 1, 0, 3, + -2, 0, 1, 2, 4, -3, -1, -5, -5, 2, -4, 1, 1, 2, -4, -2, -2, 2, 1, + 3, 4, -5, 1, -1, -3, -3, -1, -2, -5, 1, -1, 0, 1, 4, 4, 0, 0, 4, + -3, -1, -5, -3, 0, 1, 1, 1, -5, 3, 4, 3, -5, 3, -2, -2, 0, -4, 0, + 0, -2, 1, -4, -1, 0, -5, -2, -2, -5, -3, -3, 1, 1, -3, 2, 4, 2, 4, + -4, -3, 3, 1, 1, 3, -4, 4, -2, -3, -3, -3, -3, -4, -2, 3, -5, 2, 4, + -1, -4, -4, 4, -2, -1, 3, -3, -4, -4, -2, 4, 1, 0, 2, -1, 4, -3, 1, + 4, -3, 4, 4, 0, -4, 3, -2, -3, 2, 3, -1, -3, 2, 1, 4, -2, -3, 1, + 4, -2, 2, -2, -5, -2, 1, 4, -1, -4, 4, -5, 2, -5, -4, -1, -2, 3, 1, + 2, 1, -5, 1, -5, -4, -1, -2, 2, -2, -4, -3, -2, -2, 4, -1, 2, 2, -4, + 2, -2, 4, -4, -2, -2, 1, -1, 1, 1, 1, -4, -5, -2, 3, -4, -1, 3, -2, + 3, 2, -5, -4, 0, 3, -2, -4, -5, 3, -2, -4, 2, -2, 1, -4, 0, 2, -5, + 1, -4, -1, -1, 4, -5, -4, 0, -5, -4, -3, -5, -4, 0, 2, 0, -4, 2, -2, + 1, 1, -3, 2, 0, -4, 0, -4, 1, 0, -5, -1, -1, -1, -5, 4, 2, 2, -4, + 3, -2, -2, 2, -3, -2, -1, 2, -4, -5, 2, -2, -4, -5, -5, -1, 2, -1, 0, + -5, -2, -2, -5, 0, 1, -1, -5, 0, 3, 2, 3, 0, -3, -2, 0, -5, -1, -2, + 2, -4, -1, 2, 2, -5, 2, -4, 0, 3, -3, 1, 0, 0, 1, -5, -3, 1, -1, + 0, -4, -3, 2, -4, -4, 4, -1, 0, 1, 2, -4, -5, 4, -2, 1, -4, -4, -3, + -1, -1, 1, -1, -4, -1, -4, -3, 2, -1, -2, -4, 1, 1, 0, -2, 0, -4, 3, + -3, 0, -4, -1, -4, 2, -1, -2, -5, -1, -2, -3, 3, -1, 0, -3, 0, 1, -5, + 1, -5, 0, 1, + }; + + float bias_10x11[] = { 3 }; + + float expected_10x11[] = { + 118, + }; + + CNN_CONFIG cnn_config = { 1, + 0, + 0, + 0, + 0, + { { + 1, + 23, + 20, + 1, + 15, + 20, + 0, + weights_10x11, + bias_10x11, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + } } }; + + int image_height = 10; + int image_width = 11; + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input_10x11, expected_10x11, + &cnn_config, image_width, &thread_data, MSE_INT_TOL); + + float input_11x10[] = { + -2, -2, 3, -5, -1, -3, 1, 3, 2, 1, 1, -5, 4, 1, 3, -5, 3, -3, -5, + 0, -1, -3, -3, 1, 1, -5, -1, -5, -5, -3, 0, 1, -3, -1, -3, -3, 0, 3, + 4, -4, -1, 3, -3, -1, -3, 1, -3, -2, -1, -4, -3, 2, -4, 1, -4, -1, -3, + -5, -1, 2, 3, 0, 2, 2, -5, 4, 1, 2, -1, -4, 4, -4, -4, 0, -1, 1, + -1, 1, -3, -3, -2, 1, 2, 4, 4, 4, -3, -3, 0, 1, 0, 1, 4, 1, 3, + 4, -3, -2, -4, 4, 2, 0, 3, 4, -1, 2, -2, 1, -3, -2, + }; + + float weights_11x10[] = { + 4, -1, 1, -1, 2, 4, 3, 3, -4, 3, -5, 1, -1, -1, -2, -2, 0, 2, -3, + -2, 3, -5, -1, 0, -1, -2, -2, -1, 2, 4, 3, 1, 0, 0, -3, 3, -4, -1, + -5, 4, -2, -2, 1, 2, -1, -3, 1, 2, -5, 1, -3, 3, 3, 0, -4, -4, -5, + -3, -4, -4, 4, -2, 4, 4, -2, 2, -5, -1, -2, -5, -1, 4, -3, 3, -2, 0, + -4, -3, 0, -1, -2, 4, 2, 0, -2, -5, -4, 1, 4, -4, -2, 2, -2, 1, 1, + -4, 1, -4, -4, -2, 4, 2, -1, -5, -5, 1, -3, -3, 3, -3, -5, -3, 4, -1, + -1, -3, 0, -4, 3, -1, 0, -2, 0, -5, -2, -5, 2, 0, -5, 2, 3, -2, 2, + 4, -1, 1, -3, 2, 3, 2, 0, -5, -4, -5, 2, 1, 1, -1, -2, 3, 4, 2, + -2, 4, -2, 3, 1, -4, -3, -1, 4, 4, -3, -5, -2, 2, 0, 3, -2, 3, -1, + -4, 0, -2, 0, 3, 4, -2, -3, -2, 0, 3, 4, 2, -4, 0, 1, 2, 2, -1, + -1, 4, 1, 4, -2, -1, -1, -5, 1, -3, 3, 3, -1, -4, 3, -5, 0, 0, -1, + -4, -1, -2, 4, -2, 3, 3, -3, 1, -1, 2, -1, 4, 4, -2, -2, 4, -2, 0, + 3, -3, -5, -1, -2, 4, -4, 2, -4, 0, -2, 3, -3, 2, 2, -2, -5, -1, 4, + 3, -2, -1, 3, 3, -1, 3, 0, -3, 0, 4, 2, 0, -1, 4, 1, 1, 2, 1, + 3, 1, 1, 1, -3, -5, -4, 4, -4, 2, 0, 0, -4, 1, 4, -5, 4, 4, 0, + 1, 0, -2, -4, -4, -3, 0, 1, -5, 4, 0, -3, -2, -4, 2, 4, 1, -5, 1, + -4, 1, 0, -3, -3, 0, 2, -5, 4, 3, -2, -5, 3, 1, -1, 0, 3, -2, -2, + 3, -2, -5, 4, 1, -2, 2, -1, 0, 4, 0, -5, 3, -2, 1, 2, 1, -5, -3, + -2, -5, 4, -4, 0, 3, 2, -1, -4, -1, 2, 1, -2, 3, -1, -4, 2, 0, -3, + 1, -1, 2, -5, -4, -1, -5, 1, 4, 3, 4, 2, -3, 1, -5, -1, 3, 0, -1, + -4, 3, 4, -5, 4, 4, -3, 2, -3, -1, -3, -5, -3, 2, -3, -2, 1, 1, 0, + -5, 3, 2, 1, -5, 1, 1, 1, 3, 4, -4, -1, -2, 0, -5, -3, -5, -2, -4, + 3, 3, 3, 4, 0, -4, -1, -5, 0, -3, 1, 4, 4, -4, 4, -5, -5, -1, -2, + -5, 3, -4, 4, 3, 0, -3, 2, -2, 0, 0, 4, 4, 0, -2, 1, -1, -3, 2, + -1, 1, -3, -5, + }; + + float bias_11x10[] = { + -5, + }; + + float expected_11x10[] = { + 36, -84, 95, 45, 18, 46, 77, -54, -99, -149, 66, 49, 161, 11, + 39, 61, -66, 61, 4, -3, 34, -44, -23, 31, 64, 29, 47, 72, + -27, -27, 121, -3, 100, 1, 30, -78, -12, -89, -59, 8, -16, 112, + 91, -102, -26, -4, 30, 54, 4, -84, -24, -58, 27, -53, -33, 5, + 53, -26, 63, 50, -103, -130, -23, 6, -104, -207, 73, 23, 77, 132, + 38, 32, -130, -44, -60, 7, 27, 176, 45, -32, -2, 99, -97, 63, + 69, 126, 47, 63, 136, -57, 5, 16, -40, -157, 8, 38, -44, -10, + 91, 7, 122, 140, 30, -105, 4, -1, 113, 64, 180, 141, + }; + + cnn_config.layer_config[0].weights = weights_11x10; + cnn_config.layer_config[0].bias = bias_11x10; + cnn_config.layer_config[0].filter_width = 20; + cnn_config.layer_config[0].filter_height = 23; + cnn_config.layer_config[0].skip_width = 1; + cnn_config.layer_config[0].skip_height = 1; + image_height = 11; + image_width = 10; + + RunCNNTest(image_width, image_height, input_11x10, expected_11x10, + &cnn_config, image_width, &thread_data, MSE_INT_TOL); +} + +TEST_F(CNNTest, TestSoftsignSingleLayer) { + int image_width = 8; + int image_height = 8; + int filter_height = 5; + int filter_width = 4; + float input[] = { + -0.5220f, 0.8410f, -0.8990f, -0.0090f, 0.6710f, -0.9470f, -0.8240f, + -0.0870f, 0.5380f, 0.4750f, 0.570f, -0.3760f, -0.6960f, -0.5940f, + -0.3830f, 0.080f, -0.0980f, -0.4940f, -0.4030f, 0.9460f, -0.6020f, + 0.4220f, 0.6190f, 0.6640f, -0.9210f, -0.1470f, -0.2480f, -0.1120f, + -0.580f, -0.0650f, 0.3330f, 0.9860f, -0.7430f, 0.7610f, 0.4840f, + 0.1030f, 0.9570f, 0.6120f, -0.5240f, -0.1220f, -0.5850f, -0.270f, + 0.7840f, -0.9790f, 0.7290f, -0.30f, -0.6460f, 0.0780f, 0.4750f, + -0.0510f, 0.4550f, 0.3850f, -0.7230f, 0.4460f, -0.6260f, -0.810f, + 0.8720f, -0.2120f, -0.580f, -0.9510f, -0.8430f, -0.1340f, -0.0850f, + 0.9190f, + }; + float expected_same[] = { + 0.430f, 0.660f, 0.5510f, -0.610f, 0.450f, -0.1610f, 0.0520f, 0.3240f, + 0.6820f, 0.3820f, 0.6360f, 0.7480f, 0.3080f, 0.090f, 0.3910f, 0.1730f, + 0.340f, 0.6660f, -0.4990f, 0.4280f, 0.1540f, 0.120f, 0.4670f, 0.6150f, + -0.3880f, 0.7590f, 0.4190f, 0.7350f, 0.5310f, -0.5160f, -0.1760f, 0.6790f, + -0.6780f, 0.5470f, 0.5750f, -0.6420f, 0.7210f, -0.4620f, 0.5430f, 0.770f, + -0.1990f, 0.3950f, 0.7860f, -0.4380f, 0.7540f, 0.2640f, -0.6430f, 0.4510f, + -0.1260f, 0.1590f, -0.2110f, -0.0560f, 0.6570f, 0.680f, 0.5870f, 0.4720f, + 0.4040f, 0.3630f, 0.670f, 0.2360f, 0.410f, 0.6980f, -0.5350f, 0.3940f, + }; + float expected_replicate[] = { + 0.540f, 0.7230f, -0.3530f, -0.2130f, 0.7440f, -0.4470f, -0.6260f, + -0.2050f, 0.7230f, 0.4630f, 0.5920f, 0.7440f, 0.6080f, 0.3130f, + -0.5670f, -0.4720f, 0.5480f, 0.6660f, -0.4990f, 0.4280f, 0.1540f, + 0.120f, 0.3390f, 0.6090f, 0.4160f, 0.7590f, 0.4190f, 0.7350f, + 0.5310f, -0.5160f, -0.490f, 0.4450f, -0.610f, 0.5470f, 0.5750f, + -0.6420f, 0.7210f, -0.4620f, 0.3150f, 0.7370f, -0.5820f, 0.3950f, + 0.7860f, -0.4380f, 0.7540f, 0.2640f, -0.7430f, -0.5340f, -0.6270f, + 0.4430f, 0.4730f, 0.4570f, 0.7450f, 0.630f, 0.2620f, 0.3140f, + -0.1840f, 0.1810f, 0.7210f, 0.2760f, 0.6430f, 0.6720f, -0.4390f, + 0.2040f, + }; + float expected_valid[] = { + 0.6660f, -0.4990f, 0.4280f, 0.1540f, 0.120f, 0.7590f, 0.4190f, + 0.7350f, 0.5310f, -0.5160f, 0.5470f, 0.5750f, -0.6420f, 0.7210f, + -0.4620f, 0.3950f, 0.7860f, -0.4380f, 0.7540f, 0.2640f, + }; + float weights[] = { + 0.6210f, 0.3710f, -0.2770f, -0.7230f, -0.2450f, 0.6770f, 0.3080f, + -0.9880f, -0.080f, 0.7190f, -0.6760f, -0.0170f, -0.8970f, 0.8260f, + 0.7390f, -0.4550f, -0.4260f, -0.6330f, 0.0880f, -0.9390f, + }; + float bias[] = { + 0.750f, + }; + + CNN_CONFIG cnn_config = { 1, + 0, + 0, + 0, + 0, + { { + 1, + filter_width, + filter_height, + 1, + 1, + 1, + 0, + weights, + bias, + PADDING_SAME_ZERO, + SOFTSIGN, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + } } }; + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected_same, &cnn_config, + image_width, &thread_data, MSE_FLOAT_TOL); + + cnn_config.layer_config[0].pad = PADDING_SAME_REPLICATE; + + RunCNNTest(image_width, image_height, input, expected_replicate, &cnn_config, + image_width, &thread_data, MSE_FLOAT_TOL); + + cnn_config.layer_config[0].pad = PADDING_VALID; + + RunCNNTest(image_width, image_height, input, expected_valid, &cnn_config, + image_width, &thread_data, MSE_FLOAT_TOL); +} + +TEST_F(CNNTest, TestBranchTensorAdd) { + int filter_width = 2; + int filter_height = 3; + + int image_width = 4; + int image_height = 4; + + float input[] = { + -3, -2, -2, 0, -1, 3, 2, -2, 1, 3, 4, 0, 2, -5, -4, 0, + }; + + float weights[] = { + -3, -1, 4, -1, -3, 3, 3, 0, 2, 0, 3, 2, 4, 4, 4, -5, 1, -4, + 2, -4, 1, -3, 0, 4, -5, 4, 0, -4, -3, -1, 0, 0, -2, 0, 0, 2, + -5, -1, 1, -3, 3, 4, 3, 0, 1, -1, 1, 1, 2, 4, -2, -5, 2, -2, + 3, -2, 4, -1, 0, 2, 3, 2, -2, -1, -3, 1, 3, 4, -1, -3, 0, -4, + 4, 2, -3, -3, -1, 0, 1, 0, 3, 3, -3, 0, 3, 2, -5, -3, 4, -5, + 3, -1, -1, -3, 0, 1, -1, -4, 2, 4, -1, 4, -1, 1, 3, 4, 4, 4, + 0, -1, -3, -3, -3, -3, 2, -3, -2, 2, 3, -3, + }; + + float bias[] = { + 3, 4, -1, -1, 2, 1, -2, 1, 4, 1, 3, + }; + + float expected[] = { + -11502, -4101, -3424, 668, -17950, -5470, -5504, 626, + 4835, 446, 1779, -3483, 3679, -4214, 4578, -105, + }; + + int channels = 2; + + CNN_CONFIG cnn_config = { 6, + 0, + 0, + 0, + 0, + { { + 1, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + weights, + bias, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_INPUT, + BRANCH_NOC, + { + 0x02, + 0, + 0x00, + }, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 1, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 1, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_ADD, + { + 0x00, + 0, + 0x02, + }, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + 1, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + } } }; + + // Weights and biases need to be specified separately because + // of the offset. + AssignLayerWeightsBiases(&cnn_config, weights, bias); + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); +} + +TEST_F(CNNTest, TestBranchTensorConcatenation) { + int filter_width = 2; + int filter_height = 3; + + int image_width = 4; + int image_height = 4; + + float input[] = { + -3, -2, -2, 0, -1, 3, 2, -2, 1, 3, 4, 0, 2, -5, -4, 0, + }; + + float weights[] = { + 3, 0, 2, 0, 2, 3, 1, -3, 1, -5, -3, 0, -4, 4, 0, -5, 0, -5, -1, + -2, -5, 0, -3, 2, -4, 2, 0, 2, -1, 0, -4, 3, 0, 0, -1, -5, 2, -1, + 4, -4, -2, -3, -3, 3, 4, -2, -1, -4, -1, 4, 4, -1, 4, 3, -4, 2, -2, + -4, -3, -2, 3, -3, -5, -1, 3, -2, 4, 1, -4, -3, -5, -5, -3, 4, -2, -2, + -1, -5, -5, 0, -1, -2, -3, 3, -4, -5, 2, -3, 1, 0, -5, 2, 2, -2, 0, + 2, 2, -2, 4, 2, 2, 0, 1, -5, -3, 0, 2, -2, 1, 2, -5, 2, 3, 3, + -1, 3, 0, -3, 3, -4, -4, 3, 3, -4, -2, 2, -2, 2, -2, -1, 3, 0, + }; + + float bias[] = { + -3, -5, 4, -4, -3, -2, 0, 3, -4, 4, -3, + }; + + float expected[] = { + -33533, -32087, -6741, -2124, 39979, 41453, 14034, 689, + -22611, -42203, -14882, -239, 15781, 15963, 9524, 837, + }; + + int channels = 2; + + CNN_CONFIG cnn_config = { 6, + 0, + 0, + 0, + 0, + { { + 1, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + weights, + bias, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_INPUT, + BRANCH_NOC, + { + 0x02, + 0, + 0x00, + }, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 1, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 1, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_CAT, + { + 0x00, + 0, + 0x02, + }, + {}, + -1, + }, + { + channels + channels, + filter_width, + filter_height, + 1, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + } } }; + + // Weights and biases need to be specified separately because + // of the offset. + AssignLayerWeightsBiases(&cnn_config, weights, bias); + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); +} + +// TODO(logangw): Add test to test all combinations of branch_copy_type. + +TEST_F(CNNTest, TestBranchCombinations) { + int filter_width = 2; + int filter_height = 3; + + int image_width = 4; + int image_height = 4; + + float input[] = { + 3, 2, -5, -4, 4, -2, -4, -3, 4, 2, -3, 2, -3, 1, -5, -1, + }; + + float weights[] = { + 2, 3, 0, 4, 4, 3, 1, 0, 1, -5, 4, -3, 3, 0, 4, -1, -1, -5, + 2, 1, -3, -5, 3, -1, -3, -2, 0, -2, 3, 0, -2, -4, -2, -2, 2, -5, + 4, -5, 0, 1, -5, -4, -3, -4, 2, -2, 1, 0, 3, -2, -4, 3, 4, -4, + -1, -1, -3, -2, -2, -1, 2, 0, 2, -1, 2, -4, -4, -1, 2, 0, 3, -2, + -2, 3, -3, 4, -2, 4, 3, 4, 1, 0, -2, -3, -5, 1, -3, 2, 0, -2, + -2, -1, -1, -5, -2, -3, -1, 3, 3, 4, 4, 0, 2, 1, 3, -3, 2, -5, + -5, 1, -5, -1, 3, 3, 2, -4, -1, 3, -4, -2, -5, -2, 1, 3, 2, 2, + -5, -2, -3, -1, -2, -4, -1, -2, 2, 1, -4, -4, 2, 0, 2, 0, 2, -3, + -2, -4, 4, 0, 1, -3, -5, 4, -1, 2, 3, -5, -1, 0, 4, -1, -1, 3, + -1, -3, 3, 1, 4, 3, 4, 3, -4, -5, -1, 3, 3, -4, 3, 1, 3, -5, + 3, 4, -5, 4, 2, -1, -5, 2, 1, 0, 4, 0, -3, 2, 0, 2, -2, 1, + -1, -2, -1, -5, 4, 3, 3, -2, 2, 4, -5, -5, -3, -2, 4, 0, -4, 1, + }; + + float bias[] = { + -1, 4, 0, 2, 2, -2, 0, -4, -5, -1, 1, -2, 3, 0, 4, -2, 1, 0, 0, + }; + + float expected[] = { + 149496, 15553, -24193, -20956, 134094, 86432, -68283, -6366, + -53031, 133739, 67407, -13539, -53205, -58635, -20033, 1979, + }; + + int channels = 2; + + CNN_CONFIG cnn_config = { 10, + 0, + 0, + 0, + 0, + { + { + 1, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + weights, + bias, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_INPUT, + BRANCH_NOC, + { + 0x06, + 0, + 0x00, + }, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 2, + BRANCH_OUTPUT, + BRANCH_NOC, + { + 0x08, + 0, + 0x00, + }, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 3, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 2, + BRANCH_NO_COPY, + BRANCH_ADD, + { + 0x00, + 0, + 0x08, + }, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 2, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 1, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 1, + BRANCH_NO_COPY, + BRANCH_ADD, + { + 0x00, + 0, + 0x0C, + }, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + channels, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_ADD, + { + 0x00, + 0, + 0x02, + }, + {}, + -1, + }, + { + channels, + filter_width, + filter_height, + 1, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + }, + } }; + + // Weights and biases need to be specified separately because + // of the offset. + AssignLayerWeightsBiases(&cnn_config, weights, bias); + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); +} + +TEST_F(CNNTest, TestSplittingTensors) { + int filter_width = 2; + int filter_height = 3; + + int image_width = 4; + int image_height = 4; + + float input[] = { + -1, -1, 2, 1, 3, 2, 4, -3, -4, -2, 2, -3, 1, -3, 4, -2, + }; + + float weights[] = { + -4, 1, 0, 2, 3, 4, 4, -4, -5, -3, 2, 2, -4, -3, 3, 2, + 4, -4, -3, -4, -4, 1, -3, -5, -3, 4, 2, -2, 2, -1, -4, -1, + -2, -3, 1, 1, 0, -5, -1, 3, 3, -5, -3, 0, -3, 1, -3, -1, + 1, -3, -2, -2, 4, -2, 0, 1, 2, 2, -4, 2, 4, 0, -5, -2, + 4, 4, -5, 1, 0, 2, -2, -5, -5, -3, -5, -5, 4, -3, 0, 0, + -4, -4, 0, -5, -4, 0, 0, -3, -5, -3, -1, 2, -1, 4, -1, 2, + }; + + float bias[] = { + -4, -2, -3, -3, 3, 1, -2, + }; + + float expected[] = { + 530, -762, 1469, 777, 849, -771, -1698, 600, + -658, -1821, 98, -668, -1798, 30, 887, -971, + }; + + CNN_CONFIG cnn_config = { 3, + 0, + 0, + 0, + 0, + { + { + 1, + filter_width, + filter_height, + 4, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_OUTPUT, + BRANCH_NOC, + { + 0x02, + 2, + 0x00, + }, + {}, + -1, + }, + { + 4, + filter_width, + filter_height, + 2, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_CAT, + { + 0x00, + 0, + 0x02, + }, + {}, + -1, + }, + { + 4, + filter_width, + filter_height, + 1, + 1, + 1, + 0, + nullptr, + nullptr, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + }, + } }; + + // Weights and biases need to be specified separately because + // of the offset. + AssignLayerWeightsBiases(&cnn_config, weights, bias); + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected, &cnn_config, + image_width, &thread_data, MSE_INT_TOL); +} + +TEST_F(CNNTest, TestOutputChannelsCount) { + int filter_width = 1; + int filter_height = 1; + + int image_width = 2; + int image_height = 2; + + float input[] = { 0, 0, 0, 0 }; + + float weights[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + float bias[] = { 0, 0, 0, 0, 0, 0 }; + + float expected[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + CNN_CONFIG cnn_config = { 3, + 0, + 0, + 0, + 0, + { + { + 1, + filter_width, + filter_height, + 2, + 1, + 1, + 0, + weights, + bias, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_INPUT, + BRANCH_NOC, + { + 0x06, + 0, + 0x00, + }, + {}, + -1, + }, + { + 1, + filter_width, + filter_height, + 2, + 1, + 1, + 0, + weights, + bias, + PADDING_SAME_ZERO, + NONE, + 0, + 2, + BRANCH_NO_COPY, + BRANCH_CAT, + { + 0x00, + 0, + 0x03, + }, + {}, + -1, + }, + { + 2, + filter_width, + filter_height, + 2, + 1, + 1, + 0, + weights, + bias, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_CAT, + { + 0x00, + 0, + 0x04, + }, + {}, + 0, + }, + } }; + + // Weights and biases need to be specified separately because + // of the offset. + AssignLayerWeightsBiases(&cnn_config, weights, bias); + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected, &cnn_config, + image_width, &thread_data, MSE_FLOAT_TOL); +} + +TEST_F(CNNTest, TestBatchNorm) { + int image_width = 28; + int image_height = 28; + int filter_height = 7; + int filter_width = 7; + float input[] = { + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0117647f, 0.0705882f, 0.0705882f, 0.0705882f, + 0.494118f, 0.533333f, 0.686275f, 0.101961f, 0.65098f, 1.0f, + 0.968627f, 0.498039f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.117647f, 0.141176f, 0.368627f, 0.603922f, + 0.666667f, 0.992157f, 0.992157f, 0.992157f, 0.992157f, 0.992157f, + 0.882353f, 0.67451f, 0.992157f, 0.94902f, 0.764706f, 0.25098f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.192157f, + 0.933333f, 0.992157f, 0.992157f, 0.992157f, 0.992157f, 0.992157f, + 0.992157f, 0.992157f, 0.992157f, 0.984314f, 0.364706f, 0.321569f, + 0.321569f, 0.219608f, 0.152941f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0705882f, 0.858824f, 0.992157f, + 0.992157f, 0.992157f, 0.992157f, 0.992157f, 0.776471f, 0.713725f, + 0.968627f, 0.945098f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.313725f, 0.611765f, 0.419608f, 0.992157f, + 0.992157f, 0.803922f, 0.0431373f, 0.0f, 0.168627f, 0.603922f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.054902f, 0.00392157f, 0.603922f, 0.992157f, 0.352941f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.545098f, 0.992157f, 0.745098f, 0.00784314f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0431373f, + 0.745098f, 0.992157f, 0.27451f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.137255f, 0.945098f, + 0.882353f, 0.627451f, 0.423529f, 0.00392157f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.317647f, 0.941176f, 0.992157f, + 0.992157f, 0.466667f, 0.0980392f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.176471f, 0.729412f, 0.992157f, 0.992157f, + 0.588235f, 0.105882f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0627451f, 0.364706f, 0.988235f, 0.992157f, 0.733333f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.976471f, 0.992157f, 0.976471f, 0.25098f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.180392f, 0.509804f, 0.717647f, 0.992157f, + 0.992157f, 0.811765f, 0.00784314f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.152941f, 0.580392f, + 0.898039f, 0.992157f, 0.992157f, 0.992157f, 0.980392f, 0.713725f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0941176f, 0.447059f, 0.866667f, 0.992157f, 0.992157f, 0.992157f, + 0.992157f, 0.788235f, 0.305882f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0901961f, 0.258824f, 0.835294f, 0.992157f, + 0.992157f, 0.992157f, 0.992157f, 0.776471f, 0.317647f, 0.00784314f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0705882f, 0.670588f, + 0.858824f, 0.992157f, 0.992157f, 0.992157f, 0.992157f, 0.764706f, + 0.313725f, 0.0352941f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.215686f, 0.67451f, 0.886275f, 0.992157f, 0.992157f, 0.992157f, + 0.992157f, 0.956863f, 0.521569f, 0.0431373f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.533333f, 0.992157f, + 0.992157f, 0.992157f, 0.831373f, 0.529412f, 0.517647f, 0.0627451f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f + }; + float expected[] = { + -0.836424f, -0.857365f, -1.62739f, -1.62739f, -0.836424f, 5.40742f, + 0.920853f, -0.692567f, -0.836424f, -0.534405f, -1.62739f, -0.836424f, + 1.32602f, 1.36312f, 0.112766f, -0.836424f, -0.192962f, 1.56975f, + 2.45777f, 0.944414f, -0.192962f, -1.5519f, -1.5519f, -0.554006f, + -0.192962f, 1.4231f, -1.5519f, -0.192962f, 1.3661f, -1.5519f, + -1.5519f, -0.192962f, -0.843708f, -0.359025f, -0.843708f, -0.843708f, + -0.843708f, 4.53065f, 0.0429584f, -0.796804f, -0.843708f, 0.3473f, + -0.843708f, -0.843708f, -0.114439f, 3.14817f, 0.0811934f, -0.843708f + }; + float kernel[] = { + 0.119643f, -0.237864f, 0.0462892f, 0.0502297f, -0.0134528f, + 0.146347f, 0.153133f, 0.0513307f, 0.0752369f, 0.0135557f, + -0.111434f, 0.0941854f, 0.0788362f, 0.0299412f, 0.111762f, + 0.144066f, 0.00431504f, -0.0177954f, 0.0738092f, -0.0344215f, + 0.0832582f, 0.053989f, -0.112691f, 0.0962145f, 0.0186525f, + -0.00660205f, -0.111962f, -0.126801f, -0.231625f, 0.17309f, + 0.0748875f, -0.179569f, -0.00513812f, -0.156579f, -0.147322f, + 0.184168f, 0.189308f, -0.200359f, -0.0156733f, 0.140649f, + 0.0858496f, -0.0263217f, -0.0740749f, -0.112563f, 0.107528f, + 0.0609729f, -0.221625f, 0.0769944f, -0.00900815f, -0.00136441f, + -0.0236521f, -0.0418025f, -0.00286299f, 0.12241f, 0.0964093f, + -0.0150897f, 0.0532171f, 0.0625916f, 0.116939f, 0.118024f, + 0.161918f, -0.00909767f, 0.100897f, -0.054563f, -0.175179f, + -0.0687892f, 0.00734235f, 0.109833f, -0.113776f, 0.0595405f, + -0.170255f, 0.0124815f, -0.0363301f, -0.0127038f, 0.0445554f, + -0.0729894f, 0.107428f, -0.0341417f, 0.132619f, 0.00984557f, + -0.00443654f, 0.202929f, 0.0945134f, 0.0148725f, 0.00998574f, + -0.0226449f, 0.0478197f, -0.0793442f, 0.0707599f, -0.084225f, + 0.0865795f, 0.071104f, -0.047894f, 0.0838322f, 0.0635493f, + -0.00370265f, -0.157247f, -0.0289622f, -0.0590963f, 0.13207f, + 0.00468011f, -0.0345372f, 0.217939f, 0.18861f, -0.0290393f, + -0.0440664f, 0.0126197f, -0.129132f, -0.124943f, 0.0968156f, + -0.0853643f, -0.182305f, 0.00461618f, -0.147095f, -0.230282f, + 0.00856019f, 0.0278893f, -0.0300229f, 0.0417871f, 0.0804717f, + -0.0768571f, -0.0397085f, -0.0601096f, 0.100901f, -0.0184926f, + 0.0350673f, 0.0971094f, -0.0171837f, -0.289644f, -0.0899041f, + 0.08998f, -0.160319f, -0.0195103f, 0.0392167f, -0.137864f, + -0.0136294f, 0.0330886f, -0.0409244f, -0.092533f, -0.0427934f, + -0.191144f, -0.0969461f, 0.112035f, 0.138611f, 0.128717f, + 0.191184f, 0.197462f + }; + float bias[] = { 0.186703f, 0.204358f, -0.0230452f }; + + float bn_gamma[] = { 1.32173f, 1.26171f, 1.21966f }; + float bn_beta[] = { -0.232595f, -0.222652f, -0.232209f }; + float bn_mean[] = { 0.329233f, 0.199894f, 0.12389f }; + float bn_std[] = { 0.311986f, 0.189737f, 0.247104f }; + + CNN_BATCHNORM_PARAMS bn_params = { + bn_gamma, + bn_beta, + bn_mean, + bn_std, + }; + + CNN_CONFIG cnn_config = { + 1, + 0, + 0, + 0, + 0, + { + { + 1, + filter_width, + filter_height, + 3, + 7, + 7, + 0, + kernel, + bias, + PADDING_VALID, + RELU, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + bn_params, + 0, + }, + }, + }; + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected, &cnn_config, + image_width, &thread_data, MSE_FLOAT_TOL); +} + +TEST_F(CNNTest, TestMultithreading) { + int image_height = 2; + int image_width = 2; + int filter_height = 3; + int filter_width = 3; + + float input[] = { + -2, + 4, + 1, + 0, + }; + + float weights[] = { + -4, 2, -2, 0, -4, 4, -3, -3, -3, -1, 1, 0, -5, -3, 0, -5, 0, 0, + -1, 0, 2, -5, 0, 1, 4, 2, 1, 0, -2, -1, -5, -3, 2, -2, 1, -5, + }; + + float bias[] = { + -4, + -3, + -2, + 3, + }; + + float expected[] = { + 2, 10, -8, -17, -24, 5, -15, 6, -5, -5, 7, -10, 4, 13, 9, -14, + }; + + CNN_CONFIG cnn_config = { + 1, + 0, + 0, + 0, + 0, + { + { + 1, + filter_width, + filter_height, + 4, + 1, + 1, + 0, + weights, + bias, + PADDING_SAME_ZERO, + NONE, + 0, + 0, + BRANCH_NO_COPY, + BRANCH_NOC, + {}, + {}, + 0, + }, + }, + }; + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + RunCNNTest(image_width, image_height, input, expected, &cnn_config, + image_width, &thread_data, MSE_FLOAT_TOL); + + const AVxWorkerInterface *const winterface = aom_get_worker_interface(); + AVxWorker workers[4]; + + for (int i = 0; i < 4; ++i) { + winterface->init(&workers[i]); + } + + thread_data = { 4, workers }; + + RunCNNTest(image_width, image_height, input, expected, &cnn_config, + image_width, &thread_data, MSE_FLOAT_TOL); + + for (int i = 0; i < 4; ++i) { + winterface->end(&workers[i]); + } +} + +TEST_F(CNNTest, TestMultiOutput) { + const int image_dim = 8; + const int image_ch = 3; + const int filter_dim = 2; + const int stride = 2; + const int num_filters = 2; + + const float input_[] = { + 1.7537929121f, 0.134331551012f, 0.123580039877f, 0.957731845246f, + 0.391006834217f, 1.00699352042f, -0.778177955829f, -0.814166433059f, + -0.656374394915f, 0.321967305228f, -2.19455719176f, 0.708035038966f, + 0.409148822266f, -0.318254408902f, 0.152450211189f, -0.250210793369f, + 0.826811563186f, 1.6804156584f, 0.273626975978f, 0.437936241887f, + -0.329935520167f, -0.288761611645f, 0.156937008304f, 0.271054157295f, + -0.0224828854332f, 1.70110336895f, -0.989066699309f, 1.30863131729f, + -0.165813705702f, 0.00380178619265f, -0.0837342367587f, 0.760954783156f, + -0.413610373524f, 1.17968204175f, 0.720295719536f, 0.308718974472f, + -1.10091337671f, 0.693160033687f, -0.0202862320697f, 1.0221927503f, + -1.24521801881f, -0.478501952308f, -1.71648619442f, -0.182571723636f, + 0.339292649504f, 2.0806519131f, 0.967974033444f, 0.175248672328f, + 0.0658124561472f, 0.795504169496f, 0.750592557361f, -1.46631013249f, + -1.79052846838f, -1.03672179515f, -0.841985521653f, 1.20995011489f, + 0.140859718215f, -0.651552622661f, 0.451065110806f, 1.1189443693f, + 0.100213260593f, -0.834076868118f, -1.28734321611f, 1.22064420095f, + -0.364143084361f, 0.750961509335f, -0.888689074553f, -0.8253547106f, + -1.21800999027f, -0.966670603566f, 1.37384014741f, 0.47281264834f, + -0.420416235531f, 0.520163906493f, 0.501296589423f, 1.53418976951f, + 0.715234751485f, 0.644551588907f, 0.0763504863375f, -0.0018541943723f, + 0.322853189656f, -0.795099723224f, -0.125177096675f, 1.4476577471f, + -0.585888410088f, -1.44391754955f, -0.610543221933f, -0.221859179799f, + 0.252060200774f, -0.86287169623f, -0.0350246229157f, 1.0932311997f, + 0.899464648842f, -0.468806951704f, -0.300861137168f, 1.15776414206f, + 1.03268544738f, -0.171579585622f, -0.179136557119f, -0.354091003368f, + -0.612298249394f, -1.20237379258f, 1.54604109659f, 0.130664370287f, + 0.885225111868f, 1.0362799581f, 0.980561720868f, -0.619379186999f, + -1.33818929924f, -0.237233737961f, -1.89335425073f, 0.567821011321f, + 0.862420368465f, -1.37380916821f, 0.352190056666f, 0.611261516274f, + 0.393237747152f, 0.894686247967f, 0.190405182149f, 0.264872662911f, + -0.0657009133797f, 0.0580512653493f, -0.401825294366f, 0.4106081318f, + 0.49484512188f, -0.0751103149442f, -1.43243736382f, 1.79855656009f, + -1.1075351975f, 0.000354882733011f, -0.950716438608f, 1.27129831688f, + 1.00495189838f, 0.110358656713f, 1.08315032822f, -0.972676676218f, + -0.0757668962831f, 1.88932045165f, -0.0672638136275f, 0.425913010161f, + -0.781540372017f, 0.976000248609f, 0.687218504122f, 1.31374513445f, + -0.932658930672f, -1.25339468479f, 0.422071294078f, -0.24189927912f, + 0.216906604642f, -1.88720997548f, 1.99252872889f, 0.353943735777f, + 0.737434784132f, -1.17848645017f, 1.70424254896f, 0.775297112968f, + -0.516392797501f, 0.398130609129f, 0.737248101457f, 0.166282500886f, + 1.24699015468f, 0.47116183125f, 1.19091180182f, -0.372695424578f, + 0.219773209389f, -0.829467838962f, -0.52533122724f, 1.98707754595f, + 0.553692606972f, -0.933228902369f, 1.55427751643f, -1.08813399144f, + -0.325686682094f, 0.205091443796f, -1.70381666435f, 0.466465327942f, + 1.73126863447f, -0.939133672634f, 1.48318077459f, -0.599414038168f, + -1.1583078687f, 0.518116190201f, 0.133571482458f, 0.84958342672f, + 1.02205000597f, -0.0772082009087f, -1.69567503859f, 1.4697939436f, + 1.67813743122f, -0.627911582938f, 0.131380509137f, -1.35717850726f, + }; + const float *input[3] = { input_, &input_[image_dim * image_dim], + &input_[2 * image_dim * image_dim] }; + + const float bias[] = { 0.0f, 0.0f }; + + const float weights_1[] = { + -0.489547413618f, 0.141916424749f, -0.279286485585f, -0.115322211094f, + 0.299572786936f, 0.205289980785f, -0.536254480088f, -0.253626313744f, + -0.422883815849f, -0.169702966298f, -0.540104704793f, 0.495319646763f, + 0.298799079422f, -0.10054550901f, -0.306085047056f, 0.171061886165f, + -0.108058703878f, -0.410734629888f, -0.0640674673049f, -0.386524840979f, + -0.157203423678f, -0.362138920529f, -0.216206085209f, 0.147502517971f, + }; + + const float weights_2[] = { + 0.207580604357f, 0.480821146263f, -0.29111909562f, 0.47422567493f, + 0.206892553253f, -0.235067084092f, 0.354516800602f, -0.212399370252f, + -0.419071343731f, -0.050350731631f, -0.0516457320279f, -0.0359310500731f, + 0.567044864811f, -0.060341127522f, 0.0501464839637f, -0.437785677916f, + }; + + const float weights_3[] = { + -0.0690452401448f, -0.356657338763f, -0.219464031809f, 0.551288365843f, + 0.181372090853f, -0.00245268542109f, 0.409000696276f, -0.593209108763f, + 0.587352566749f, -0.243720660227f, 0.266232713887f, -0.00439285245097f, + 0.252883228305f, 0.152646192631f, 0.0918944932026f, 0.398853715057f, + }; + + const float weights_4[] = { + 0.207560791573f, 0.194201350401f, 0.227802322443f, 0.206533663345f, + 0.0557331066805f, 0.0224159800424f, -0.143939197467f, -0.27703361602f, + 0.130643888389f, -0.269456557461f, 0.186242862864f, -0.162879944774f, + -0.145503996718f, -0.0768822987581f, -0.203127976359f, -0.238119922873f, + -0.258806479994f, 0.0357957680385f, -0.1027606976f, -0.287920082345f, + 0.189047820993f, 0.250711538481f, -0.272815714175f, -0.0431449742024f, + 0.207261230996f, -0.0396472677451f, 0.131236557412f, 0.174291832499f, + -0.251515885765f, -0.107164007499f, 0.185824534748f, -0.00561585838161f, + 0.273393799578f, -0.139563699075f, -0.263922456031f, -0.118859844081f, + 0.109230982597f, -0.170170294794f, 0.0123025648515f, -0.0839368964355f, + -0.0774058234297f, 0.255847138286f, -0.208430879637f, 0.279170114319f, + -0.272890330712f, -0.217725903006f, -0.295923275459f, -0.17008723953f, + -0.284281803405f, 0.281406323629f, 0.266910044663f, -0.209963914338f, + 0.271980962964f, 0.142013581699f, -0.143896509026f, -0.290509242975f, + -0.305768180935f, 0.196902832117f, -0.090424189662f, -0.147460802346f, + 0.217722016651f, 0.12353848977f, -0.169177363577f, -0.0454230918512f, + }; + + const float expected_0[] = { + -2.04858441055f, -2.12883075791f, -0.045177363807f, 0.763949675768f, + -0.544361512821f, -1.58123168032f, 1.89319847039f, 0.16859080901f, + -1.16023321135f, -0.396988107751f, 1.76637090744f, -1.40434786514f, + 0.908227575669f, 0.817064817605f, 0.215631134908f, -0.848605613428f, + -0.106756747018f, 0.0193027166685f, 0.801345615113f, -0.395407237598f, + -1.79983795658f, -1.73054496242f, 0.0584392594454f, -0.388786095569f, + -0.237269619354f, 0.000843578271263f, -1.24043512104f, 0.487839445893f, + -0.394259726605f, 0.559632843424f, -0.527224052291f, -1.53792340282f, + }; + + const float expected_1[] = { + 0.0f, 0.0f, 0.0f, 0.0f, 0.4057888292f, 0.325309571755f, + 0.0f, 1.22013465602f, + }; + + const float expected_2[] = { + 0.156119444687f, + 0.517385299817f, + }; + + const float expected_3[] = { + 0.224177852984f, + 0.503384419034f, + 0.156119444687f, + 0.517385299817f, + }; + + const float *expected[] = { expected_0, expected_1, expected_2, expected_3 }; + + CNN_CONFIG cnn_config = { + 4, // num_layers + 0, // is_residue + 0, // ext_width + 0, // ext_height + 0, // strict_bounds + { + // layer_config + { + image_ch, // in_channels + filter_dim, // filter_width + filter_dim, // filter_height + num_filters, // out_channels + stride, // skip_width + stride, // skip_height + 0, // max_pool + weights_1, // weights + bias, // bias + PADDING_SAME_ZERO, // pad + NONE, // activation + 0, // deconvolve + 0, // branch + BRANCH_OUTPUT, // branch_copy_type + BRANCH_NOC, // branch_combine_type + { 2, 0, 0 }, // branch_config + {}, // bn_params + 0, // output_num + }, + { + num_filters, // in_channels + filter_dim, // filter_width + filter_dim, // filter_height + num_filters, // out_channels + stride, // skip_width + stride, // skip_height + 0, // max_pool + weights_2, // weights + bias, // bias + PADDING_SAME_ZERO, // pad + RELU, // activation + 0, // deconvolve + 0, // branch + BRANCH_NO_COPY, // branch_copy_type + BRANCH_NOC, // branch_combine_type + {}, // branch_config + {}, // bn_params + 1, // output_num + }, + { + num_filters, // in_channels + filter_dim, // filter_width + filter_dim, // filter_height + num_filters, // out_channels + stride, // skip_width + stride, // skip_height + 0, // max_pool + weights_3, // weights + bias, // bias + PADDING_SAME_ZERO, // pad + RELU, // activation + 0, // deconvolve + 0, // branch + BRANCH_NO_COPY, // branch_copy_type + BRANCH_NOC, // branch_combine_type + {}, // branch_config + {}, // bn_params + 2, // output_num + }, + { + num_filters, // in_channels + 2 * filter_dim, // filter_width + 2 * filter_dim, // filter_height + num_filters, // out_channels + 2 * stride, // skip_width + 2 * stride, // skip_height + 0, // max_pool + weights_4, // weights + bias, // bias + PADDING_VALID, // pad + RELU, // activation + 0, // deconvolve + 1, // branch + BRANCH_NO_COPY, // branch_copy_type + BRANCH_CAT, // branch_combine_type + { 0, 0, 1 }, // branch_config + {}, // bn_params + 3, // output_num + }, + }, + }; + + CNN_THREAD_DATA thread_data = { 1, nullptr }; + + const int num_outputs = 4; + const int output_chs[4] = { filter_dim, filter_dim, filter_dim, + 2 * filter_dim }; + const int output_dims[4] = { 4, 2, 1, 1 }; + const int output_sizes[4] = { + output_chs[0] * output_dims[0] * output_dims[0], + output_chs[1] * output_dims[1] * output_dims[1], + output_chs[2] * output_dims[2] * output_dims[2], + output_chs[3] * output_dims[3] * output_dims[3], + }; + float *const output_ = (float *)aom_malloc( + sizeof(*output_) * + (output_sizes[0] + output_sizes[1] + output_sizes[2] + output_sizes[3])); + ASSERT_NE(output_, nullptr); + float *output[CNN_MAX_CHANNELS] = { nullptr }; + int ch_ite = 0; + float *output_ite = output_; + for (int output_idx = 0; output_idx < num_outputs; output_idx++) { + for (int channel = 0; channel < output_chs[output_idx]; ++channel) { + output[ch_ite++] = output_ite; + output_ite += output_dims[output_idx] * output_dims[output_idx]; + } + } + CNN_MULTI_OUT output_struct = { num_outputs, output_chs, output_dims, + output }; + + RunMultiOutCNNTest(input, image_dim, image_dim, image_dim, &cnn_config, + &thread_data, &output_struct, expected, MSE_FLOAT_TOL); + + aom_free(output_); +} + +namespace { + +typedef void (*CNNConvolveNoMaxpoolPaddingValidFunc)( + const float **input, int in_width, int in_height, int in_stride, + const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride, + int start_idx, int cstep, int channel_step); + +typedef libaom_test::FuncParam<CNNConvolveNoMaxpoolPaddingValidFunc> + CNNConvolveTestFuncs; + +class CNNConvolveTest : public ::testing::TestWithParam<CNNConvolveTestFuncs> { + protected: + void SetUp() override { params_ = GetParam(); } + + void RunCNNConvolveSetup(int run_times) { + int in_width = 65; + int in_height = 65; + + const CNN_CONFIG *cnn_config = &av1_intra_mode_cnn_partition_cnn_config; + + for (int layer = 0; layer < cnn_config->num_layers; ++layer) { + int out_width = 0, out_height = 0; + int in_size = in_width * in_height; + // Get current layer output width and height. + av1_find_cnn_layer_output_size(in_height, in_width, + &cnn_config->layer_config[layer], + &out_width, &out_height); + + int out_size = out_width * out_height; + float *input[20], *output_ref[20], *output_mod[20]; + + float *input_data = + (float *)aom_malloc(sizeof(*input_data) * in_size * + cnn_config->layer_config[layer].in_channels); + float *temp_ptr = input_data; + ASSERT_NE(temp_ptr, nullptr); + for (int i = 0; i < cnn_config->layer_config[layer].in_channels; ++i) { + input[i] = temp_ptr; + for (int j = 0; j < in_size; j++) { + *(temp_ptr++) = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31); + } + } + + float *out_data_ref = (float *)aom_calloc( + sizeof(*out_data_ref), + out_size * cnn_config->layer_config[layer].out_channels); + ASSERT_NE(out_data_ref, nullptr); + float *out_data_mod = (float *)aom_calloc( + sizeof(*out_data_mod), + out_size * cnn_config->layer_config[layer].out_channels); + ASSERT_NE(out_data_mod, nullptr); + float *temp_ptr1 = out_data_ref; + float *temp_ptr2 = out_data_mod; + for (int i = 0; i < cnn_config->layer_config[layer].out_channels; ++i) { + output_ref[i] = temp_ptr1; + output_mod[i] = temp_ptr2; + temp_ptr1 += out_size; + temp_ptr2 += out_size; + } + + RunCNNConvolveTest(input, in_width, in_height, out_size, + &cnn_config->layer_config[layer], 0, 1, run_times, + layer, output_ref, output_mod, out_width); + + // Set current layer output width and height as next layer input width and + // height. + in_width = out_width; + in_height = out_height; + + aom_free(input_data); + aom_free(out_data_ref); + aom_free(out_data_mod); + } + } + + void RunCNNConvolveTest(float **input, int in_width, int in_height, + int out_size, const CNN_LAYER_CONFIG *layer_config, + int start_idx, int step, int run_times, int layer, + float **output_ref, float **output_mod, + int out_stride) { + const int cstep = layer_config->in_channels * layer_config->out_channels; + const int channel_step = AOMMAX(step, 1); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.ref_func((const float **)input, in_width, in_height, in_width, + layer_config, output_ref, out_stride, start_idx, cstep, + channel_step); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + params_.tst_func((const float **)input, in_width, in_height, in_width, + layer_config, output_mod, out_stride, start_idx, cstep, + channel_step); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + if (run_times > 1) { + printf("layer : %d \n", layer); + printf("%7.2f/%7.2fns (%3.2f)\n", time1, time2, time1 / time2); + } else { + for (int channel = 0; channel < layer_config->out_channels; ++channel) { + const float *buf_ref = output_ref[channel]; + const float *buf_mod = output_mod[channel]; + + for (int i = 0; i < out_size; ++i) { + if (buf_ref[i] < CNN_CONVOLVE_PIXELWISE_FLOAT_TOL) { + ASSERT_LE(buf_ref[i], CNN_CONVOLVE_PIXELWISE_FLOAT_TOL) + << "Reference output was near-zero, test output was not (" + << buf_mod[i] << ")"; + } else { + const float error = buf_ref[i] - buf_mod[i]; + const float relative_error = fabsf(error / buf_ref[i]); + ASSERT_LE(relative_error, CNN_CONVOLVE_PIXELWISE_FLOAT_TOL) + << " channel " << channel << " pixel " << i << ": " + << buf_ref[i] << "/" << buf_mod[i] << std::endl; + } + } + } + } + } + + private: + CNNConvolveTestFuncs params_; + libaom_test::ACMRandom rng_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CNNConvolveTest); + +TEST_P(CNNConvolveTest, CheckOutput) { RunCNNConvolveSetup(1); } + +TEST_P(CNNConvolveTest, DISABLED_Speed) { RunCNNConvolveSetup(100000); } + +#if HAVE_AVX2 && !CONFIG_EXCLUDE_SIMD_MISMATCH +INSTANTIATE_TEST_SUITE_P(AVX2, CNNConvolveTest, + ::testing::Values(CNNConvolveTestFuncs( + &av1_cnn_convolve_no_maxpool_padding_valid_c, + &av1_cnn_convolve_no_maxpool_padding_valid_avx2))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, CNNConvolveTest, + ::testing::Values(CNNConvolveTestFuncs( + &av1_cnn_convolve_no_maxpool_padding_valid_c, + &av1_cnn_convolve_no_maxpool_padding_valid_neon))); +#endif + +} // namespace diff --git a/third_party/aom/test/codec_factory.h b/third_party/aom/test/codec_factory.h new file mode 100644 index 0000000000..7ffc465a7b --- /dev/null +++ b/third_party/aom/test/codec_factory.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef AOM_TEST_CODEC_FACTORY_H_ +#define AOM_TEST_CODEC_FACTORY_H_ + +#include <tuple> + +#include "config/aom_config.h" + +#include "aom/aom_decoder.h" +#include "aom/aom_encoder.h" +#if CONFIG_AV1_ENCODER +#include "aom/aomcx.h" +#endif +#if CONFIG_AV1_DECODER +#include "aom/aomdx.h" +#endif + +#include "test/decode_test_driver.h" +#include "test/encode_test_driver.h" +namespace libaom_test { + +const int kCodecFactoryParam = 0; + +class CodecFactory { + public: + CodecFactory() = default; + + virtual ~CodecFactory() = default; + + virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const = 0; + + virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg, + const aom_codec_flags_t flags) const = 0; + + virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg, + const aom_codec_flags_t init_flags, + TwopassStatsStore *stats) const = 0; + + virtual aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg, + unsigned int usage) const = 0; +}; + +/* Provide CodecTestWith<n>Params classes for a variable number of parameters + * to avoid having to include a pointer to the CodecFactory in every test + * definition. + */ +template <class T1> +class CodecTestWithParam + : public ::testing::TestWithParam< + std::tuple<const libaom_test::CodecFactory *, T1> > {}; + +template <class T1, class T2> +class CodecTestWith2Params + : public ::testing::TestWithParam< + std::tuple<const libaom_test::CodecFactory *, T1, T2> > {}; + +template <class T1, class T2, class T3> +class CodecTestWith3Params + : public ::testing::TestWithParam< + std::tuple<const libaom_test::CodecFactory *, T1, T2, T3> > {}; + +template <class T1, class T2, class T3, class T4> +class CodecTestWith4Params + : public ::testing::TestWithParam< + std::tuple<const libaom_test::CodecFactory *, T1, T2, T3, T4> > {}; + +template <class T1, class T2, class T3, class T4, class T5> +class CodecTestWith5Params + : public ::testing::TestWithParam< + std::tuple<const libaom_test::CodecFactory *, T1, T2, T3, T4, T5> > { +}; + +template <class T1, class T2, class T3, class T4, class T5, class T6> +class CodecTestWith6Params + : public ::testing::TestWithParam<std::tuple< + const libaom_test::CodecFactory *, T1, T2, T3, T4, T5, T6> > {}; + +/* + * AV1 Codec Definitions + */ +class AV1Decoder : public Decoder { + public: + explicit AV1Decoder(aom_codec_dec_cfg_t cfg) : Decoder(cfg) {} + + AV1Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag) + : Decoder(cfg, flag) {} + + protected: + aom_codec_iface_t *CodecInterface() const override { +#if CONFIG_AV1_DECODER + return aom_codec_av1_dx(); +#else + return nullptr; +#endif + } +}; + +class AV1Encoder : public Encoder { + public: + AV1Encoder(aom_codec_enc_cfg_t cfg, const aom_codec_flags_t init_flags, + TwopassStatsStore *stats) + : Encoder(cfg, init_flags, stats) {} + + protected: + aom_codec_iface_t *CodecInterface() const override { +#if CONFIG_AV1_ENCODER + return aom_codec_av1_cx(); +#else + return nullptr; +#endif + } +}; + +class AV1CodecFactory : public CodecFactory { + public: + AV1CodecFactory() : CodecFactory() {} + + Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const override { + return CreateDecoder(cfg, 0); + } + + Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg, + const aom_codec_flags_t flags) const override { +#if CONFIG_AV1_DECODER + return new AV1Decoder(cfg, flags); +#else + (void)cfg; + (void)flags; + return nullptr; +#endif + } + + Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg, + const aom_codec_flags_t init_flags, + TwopassStatsStore *stats) const override { +#if CONFIG_AV1_ENCODER + return new AV1Encoder(cfg, init_flags, stats); +#else + (void)cfg; + (void)init_flags; + (void)stats; + return nullptr; +#endif + } + + aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg, + unsigned int usage) const override { +#if CONFIG_AV1_ENCODER + return aom_codec_enc_config_default(aom_codec_av1_cx(), cfg, usage); +#else + (void)cfg; + (void)usage; + return AOM_CODEC_INCAPABLE; +#endif + } +}; + +const libaom_test::AV1CodecFactory kAV1; + +#define AV1_INSTANTIATE_TEST_SUITE(test, ...) \ + INSTANTIATE_TEST_SUITE_P( \ + AV1, test, \ + ::testing::Combine( \ + ::testing::Values(static_cast<const libaom_test::CodecFactory *>( \ + &libaom_test::kAV1)), \ + __VA_ARGS__)) + +} // namespace libaom_test +#endif // AOM_TEST_CODEC_FACTORY_H_ diff --git a/third_party/aom/test/coding_path_sync.cc b/third_party/aom/test/coding_path_sync.cc new file mode 100644 index 0000000000..f7b7eace90 --- /dev/null +++ b/third_party/aom/test/coding_path_sync.cc @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" + +#include "config/aom_config.h" + +#include "aom/aomcx.h" +#include "aom/aomdx.h" +#include "aom/aom_encoder.h" +#include "aom/aom_decoder.h" + +#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0])) + +using libaom_test::ACMRandom; +namespace { + +class CompressedSource { + public: + explicit CompressedSource(int seed) : rnd_(seed), frame_count_(0) { + aom_codec_iface_t *algo = aom_codec_av1_cx(); + + aom_codec_enc_cfg_t cfg; +#if CONFIG_REALTIME_ONLY + aom_codec_enc_config_default(algo, &cfg, 1); +#else + aom_codec_enc_config_default(algo, &cfg, 0); +#endif + + // force the quantizer, to reduce the sensitivity on encoding choices. + // e.g, we don't want this test to break when the rate control is modified. + { + const int max_q = cfg.rc_max_quantizer; + const int min_q = cfg.rc_min_quantizer; + const int q = rnd_.PseudoUniform(max_q - min_q + 1) + min_q; + + cfg.rc_end_usage = AOM_Q; + cfg.rc_max_quantizer = q; + cfg.rc_min_quantizer = q; + } + + // choose the picture size + { + width_ = rnd_.PseudoUniform(kWidth - 8) + 8; + height_ = rnd_.PseudoUniform(kHeight - 8) + 8; + } + + // choose the chroma subsampling + { + const aom_img_fmt_t fmts[] = { + AOM_IMG_FMT_I420, + AOM_IMG_FMT_I422, + AOM_IMG_FMT_I444, + }; + + format_ = fmts[rnd_.PseudoUniform(NELEMENTS(fmts))]; + } + + cfg.g_w = width_; + cfg.g_h = height_; + cfg.g_lag_in_frames = 0; + if (format_ == AOM_IMG_FMT_I420) + cfg.g_profile = 0; + else if (format_ == AOM_IMG_FMT_I444) + cfg.g_profile = 1; + else if (format_ == AOM_IMG_FMT_I422) + cfg.g_profile = 2; + + aom_codec_enc_init(&enc_, algo, &cfg, 0); + } + + ~CompressedSource() { aom_codec_destroy(&enc_); } + + const aom_codec_cx_pkt_t *ReadFrame() { + uint8_t buf[kWidth * kHeight * 3] = { 0 }; + + // render regular pattern + const int period = rnd_.Rand8() % 32 + 1; + const int phase = rnd_.Rand8() % period; + + const int val_a = rnd_.Rand8(); + const int val_b = rnd_.Rand8(); + + for (int i = 0; i < (int)sizeof buf; ++i) + buf[i] = (i + phase) % period < period / 2 ? val_a : val_b; + + aom_image_t img; + aom_img_wrap(&img, format_, width_, height_, 0, buf); + aom_codec_encode(&enc_, &img, frame_count_++, 1, 0); + + aom_codec_iter_t iter = nullptr; + + const aom_codec_cx_pkt_t *pkt = nullptr; + + do { + pkt = aom_codec_get_cx_data(&enc_, &iter); + } while (pkt && pkt->kind != AOM_CODEC_CX_FRAME_PKT); + + return pkt; + } + + private: + static const int kWidth = 128; + static const int kHeight = 128; + + ACMRandom rnd_; + aom_img_fmt_t format_; + aom_codec_ctx_t enc_; + int frame_count_; + int width_, height_; +}; + +// lowers an aom_image_t to an easily comparable/printable form +std::vector<uint16_t> Serialize(const aom_image_t *img) { + std::vector<uint16_t> bytes; + bytes.reserve(img->d_w * img->d_h * 3); + for (int plane = 0; plane < 3; ++plane) { + const int w = aom_img_plane_width(img, plane); + const int h = aom_img_plane_height(img, plane); + + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { + const unsigned char *row = img->planes[plane] + r * img->stride[plane]; + if (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + const uint16_t *row16 = reinterpret_cast<const uint16_t *>(row); + bytes.push_back(row16[c]); + } else { + bytes.push_back(row[c]); + } + } + } + } + + return bytes; +} + +class Decoder { + public: + explicit Decoder(int allowLowbitdepth) { + aom_codec_iface_t *algo = aom_codec_av1_dx(); + + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.allow_lowbitdepth = allowLowbitdepth; + + aom_codec_dec_init(&dec_, algo, &cfg, 0); + } + + ~Decoder() { aom_codec_destroy(&dec_); } + + std::vector<uint16_t> decode(const aom_codec_cx_pkt_t *pkt) { + aom_codec_decode(&dec_, static_cast<uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz, nullptr); + + aom_codec_iter_t iter = nullptr; + return Serialize(aom_codec_get_frame(&dec_, &iter)); + } + + private: + aom_codec_ctx_t dec_; +}; + +// Try to reveal a mismatch between LBD and HBD coding paths. +TEST(CodingPathSync, SearchForHbdLbdMismatch) { + const int count_tests = 10; + for (int i = 0; i < count_tests; ++i) { + Decoder dec_hbd(0); + Decoder dec_lbd(1); + + CompressedSource enc(i); + + for (int k = 0; k < 3; ++k) { + const aom_codec_cx_pkt_t *frame = enc.ReadFrame(); + + std::vector<uint16_t> lbd_yuv = dec_lbd.decode(frame); + std::vector<uint16_t> hbd_yuv = dec_hbd.decode(frame); + + ASSERT_EQ(lbd_yuv, hbd_yuv); + } + } +} + +TEST(CodingPathSyncLarge, SearchForHbdLbdMismatchLarge) { + const int count_tests = 100; + const int seed = 1234; + for (int i = 0; i < count_tests; ++i) { + Decoder dec_hbd(0); + Decoder dec_lbd(1); + + CompressedSource enc(seed + i); + + for (int k = 0; k < 5; ++k) { + const aom_codec_cx_pkt_t *frame = enc.ReadFrame(); + + std::vector<uint16_t> lbd_yuv = dec_lbd.decode(frame); + std::vector<uint16_t> hbd_yuv = dec_hbd.decode(frame); + + ASSERT_EQ(lbd_yuv, hbd_yuv); + } + } +} + +} // namespace diff --git a/third_party/aom/test/comp_avg_pred_test.cc b/third_party/aom/test/comp_avg_pred_test.cc new file mode 100644 index 0000000000..2f81d7e9b7 --- /dev/null +++ b/third_party/aom/test/comp_avg_pred_test.cc @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "test/comp_avg_pred_test.h" + +using libaom_test::ACMRandom; +using libaom_test::AV1DISTWTDCOMPAVG::AV1DISTWTDCOMPAVGTest; +using libaom_test::AV1DISTWTDCOMPAVG::DistWtdCompAvgParam; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DISTWTDCOMPAVGTest); +using libaom_test::AV1DISTWTDCOMPAVG::AV1DISTWTDCOMPAVGUPSAMPLEDTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DISTWTDCOMPAVGUPSAMPLEDTest); +using libaom_test::AV1DISTWTDCOMPAVG::DistWtdCompAvgTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DistWtdCompAvgTest); +#if CONFIG_AV1_HIGHBITDEPTH +using libaom_test::AV1DISTWTDCOMPAVG::AV1HighBDDISTWTDCOMPAVGTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighBDDISTWTDCOMPAVGTest); +using libaom_test::AV1DISTWTDCOMPAVG::AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST( + AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest); +#endif +using std::make_tuple; +using std::tuple; + +uint8_t *DistWtdCompAvgTest::reference_data_ = nullptr; +uint8_t *DistWtdCompAvgTest::second_pred_ = nullptr; +uint8_t *DistWtdCompAvgTest::comp_pred_ = nullptr; +uint8_t *DistWtdCompAvgTest::comp_pred_test_ = nullptr; +uint8_t *DistWtdCompAvgTest::reference_data8_ = nullptr; +uint8_t *DistWtdCompAvgTest::second_pred8_ = nullptr; +uint8_t *DistWtdCompAvgTest::comp_pred8_ = nullptr; +uint8_t *DistWtdCompAvgTest::comp_pred8_test_ = nullptr; +uint16_t *DistWtdCompAvgTest::reference_data16_ = nullptr; +uint16_t *DistWtdCompAvgTest::second_pred16_ = nullptr; +uint16_t *DistWtdCompAvgTest::comp_pred16_ = nullptr; +uint16_t *DistWtdCompAvgTest::comp_pred16_test_ = nullptr; + +namespace { + +TEST_P(AV1DISTWTDCOMPAVGTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); } + +TEST_P(AV1DISTWTDCOMPAVGTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); } + +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P(SSSE3, AV1DISTWTDCOMPAVGTest, + libaom_test::AV1DISTWTDCOMPAVG::BuildParams( + aom_dist_wtd_comp_avg_pred_ssse3)); +#endif + +TEST_P(AV1DISTWTDCOMPAVGUPSAMPLEDTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0)); +} + +TEST_P(AV1DISTWTDCOMPAVGUPSAMPLEDTest, CheckOutput) { + RunCheckOutput(GET_PARAM(0)); +} + +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P(SSSE3, AV1DISTWTDCOMPAVGUPSAMPLEDTest, + libaom_test::AV1DISTWTDCOMPAVG::BuildParams( + aom_dist_wtd_comp_avg_upsampled_pred_ssse3)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1DISTWTDCOMPAVGUPSAMPLEDTest, + libaom_test::AV1DISTWTDCOMPAVG::BuildParams( + aom_dist_wtd_comp_avg_upsampled_pred_neon)); +#endif // HAVE_NEON + +TEST_P(DistWtdCompAvgTest, MaxRef) { + FillConstant(reference_data_, reference_stride_, mask_); + FillConstant(second_pred_, width_, 0); + CheckCompAvg(); +} + +TEST_P(DistWtdCompAvgTest, MaxSecondPred) { + FillConstant(reference_data_, reference_stride_, 0); + FillConstant(second_pred_, width_, mask_); + CheckCompAvg(); +} + +TEST_P(DistWtdCompAvgTest, ShortRef) { + const int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckCompAvg(); + reference_stride_ = tmp_stride; +} + +TEST_P(DistWtdCompAvgTest, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckCompAvg(); + reference_stride_ = tmp_stride; +} + +// TODO(chengchen): add highbd tests +const DistWtdCompAvgParam dist_wtd_comp_avg_c_tests[] = { + make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_c, -1), + +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_c, -1), + make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_c, -1), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(C, DistWtdCompAvgTest, + ::testing::ValuesIn(dist_wtd_comp_avg_c_tests)); + +#if HAVE_SSSE3 +const DistWtdCompAvgParam dist_wtd_comp_avg_ssse3_tests[] = { + make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1), + make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(SSSE3, DistWtdCompAvgTest, + ::testing::ValuesIn(dist_wtd_comp_avg_ssse3_tests)); +#endif // HAVE_SSSE3 + +#if HAVE_NEON +const DistWtdCompAvgParam dist_wtd_comp_avg_neon_tests[] = { + make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_neon, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_neon, -1), + make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_neon, -1), +#endif // !CONFIG_REALTIME_ONLY +}; + +INSTANTIATE_TEST_SUITE_P(NEON, DistWtdCompAvgTest, + ::testing::ValuesIn(dist_wtd_comp_avg_neon_tests)); +#endif // HAVE_NEON + +#if CONFIG_AV1_HIGHBITDEPTH +TEST_P(AV1HighBDDISTWTDCOMPAVGTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(1)); +} + +TEST_P(AV1HighBDDISTWTDCOMPAVGTest, CheckOutput) { + RunCheckOutput(GET_PARAM(1)); +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1HighBDDISTWTDCOMPAVGTest, + libaom_test::AV1DISTWTDCOMPAVG::BuildParams( + aom_highbd_dist_wtd_comp_avg_pred_sse2, 1)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1HighBDDISTWTDCOMPAVGTest, + libaom_test::AV1DISTWTDCOMPAVG::BuildParams( + aom_highbd_dist_wtd_comp_avg_pred_neon, 1)); +#endif + +TEST_P(AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(1)); +} + +TEST_P(AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest, CheckOutput) { + RunCheckOutput(GET_PARAM(1)); +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest, + libaom_test::AV1DISTWTDCOMPAVG::BuildParams( + aom_highbd_dist_wtd_comp_avg_upsampled_pred_sse2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest, + libaom_test::AV1DISTWTDCOMPAVG::BuildParams( + aom_highbd_dist_wtd_comp_avg_upsampled_pred_neon)); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/comp_avg_pred_test.h b/third_party/aom/test/comp_avg_pred_test.h new file mode 100644 index 0000000000..396df2e2dd --- /dev/null +++ b/third_party/aom/test/comp_avg_pred_test.h @@ -0,0 +1,757 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_COMP_AVG_PRED_TEST_H_ +#define AOM_TEST_COMP_AVG_PRED_TEST_H_ + +#include <tuple> + +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "test/register_state_check.h" +#include "av1/common/common_data.h" +#include "aom_ports/aom_timer.h" + +namespace libaom_test { +const int kMaxSize = 128 + 32; // padding + +namespace AV1DISTWTDCOMPAVG { + +typedef void (*distwtdcompavg_func)(uint8_t *comp_pred, const uint8_t *pred, + int width, int height, const uint8_t *ref, + int ref_stride, + const DIST_WTD_COMP_PARAMS *jcp_param); + +typedef void (*distwtdcompavgupsampled_func)( + MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, + const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width, + int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref, + int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search); + +typedef void (*DistWtdCompAvgFunc)(uint8_t *comp_pred, const uint8_t *pred, + int width, int height, const uint8_t *ref, + int ref_stride, + const DIST_WTD_COMP_PARAMS *jcp_param); + +typedef std::tuple<distwtdcompavg_func, BLOCK_SIZE> DISTWTDCOMPAVGParam; + +typedef std::tuple<distwtdcompavgupsampled_func, BLOCK_SIZE> + DISTWTDCOMPAVGUPSAMPLEDParam; + +typedef std::tuple<int, int, DistWtdCompAvgFunc, int> DistWtdCompAvgParam; + +#if CONFIG_AV1_HIGHBITDEPTH +typedef void (*highbddistwtdcompavgupsampled_func)( + MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, + const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width, + int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, + int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param, + int subpel_search); + +typedef std::tuple<int, highbddistwtdcompavgupsampled_func, BLOCK_SIZE> + HighbdDISTWTDCOMPAVGUPSAMPLEDParam; + +typedef std::tuple<int, distwtdcompavg_func, BLOCK_SIZE> + HighbdDISTWTDCOMPAVGParam; + +::testing::internal::ParamGenerator<HighbdDISTWTDCOMPAVGParam> BuildParams( + distwtdcompavg_func filter, int is_hbd) { + (void)is_hbd; + return ::testing::Combine(::testing::Range(8, 13, 2), + ::testing::Values(filter), + ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL)); +} + +::testing::internal::ParamGenerator<HighbdDISTWTDCOMPAVGUPSAMPLEDParam> +BuildParams(highbddistwtdcompavgupsampled_func filter) { + return ::testing::Combine(::testing::Range(8, 13, 2), + ::testing::Values(filter), + ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL)); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +::testing::internal::ParamGenerator<DISTWTDCOMPAVGParam> BuildParams( + distwtdcompavg_func filter) { + return ::testing::Combine(::testing::Values(filter), + ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL)); +} + +::testing::internal::ParamGenerator<DISTWTDCOMPAVGUPSAMPLEDParam> BuildParams( + distwtdcompavgupsampled_func filter) { + return ::testing::Combine(::testing::Values(filter), + ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL)); +} + +class AV1DISTWTDCOMPAVGTest + : public ::testing::TestWithParam<DISTWTDCOMPAVGParam> { + public: + ~AV1DISTWTDCOMPAVGTest() override = default; + void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); } + + protected: + void RunCheckOutput(distwtdcompavg_func test_impl) { + const int w = kMaxSize, h = kMaxSize; + const int block_idx = GET_PARAM(1); + + uint8_t pred8[kMaxSize * kMaxSize]; + uint8_t ref8[kMaxSize * kMaxSize]; + uint8_t output[kMaxSize * kMaxSize]; + uint8_t output2[kMaxSize * kMaxSize]; + + for (int i = 0; i < h; ++i) + for (int j = 0; j < w; ++j) { + pred8[i * w + j] = rnd_.Rand8(); + ref8[i * w + j] = rnd_.Rand8(); + } + const int in_w = block_size_wide[block_idx]; + const int in_h = block_size_high[block_idx]; + + DIST_WTD_COMP_PARAMS dist_wtd_comp_params; + dist_wtd_comp_params.use_dist_wtd_comp_avg = 1; + + for (int ii = 0; ii < 2; ii++) { + for (int jj = 0; jj < 4; jj++) { + dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii]; + dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[jj][1 - ii]; + + const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7); + const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7); + aom_dist_wtd_comp_avg_pred_c(output, pred8 + offset_r * w + offset_c, + in_w, in_h, ref8 + offset_r * w + offset_c, + in_w, &dist_wtd_comp_params); + test_impl(output2, pred8 + offset_r * w + offset_c, in_w, in_h, + ref8 + offset_r * w + offset_c, in_w, &dist_wtd_comp_params); + + for (int i = 0; i < in_h; ++i) { + for (int j = 0; j < in_w; ++j) { + int idx = i * in_w + j; + ASSERT_EQ(output[idx], output2[idx]) + << "Mismatch at unit tests for AV1DISTWTDCOMPAVGTest\n" + << in_w << "x" << in_h << " Pixel mismatch at index " << idx + << " = (" << i << ", " << j << ")"; + } + } + } + } + } + void RunSpeedTest(distwtdcompavg_func test_impl) { + const int w = kMaxSize, h = kMaxSize; + const int block_idx = GET_PARAM(1); + + uint8_t pred8[kMaxSize * kMaxSize]; + uint8_t ref8[kMaxSize * kMaxSize]; + uint8_t output[kMaxSize * kMaxSize]; + uint8_t output2[kMaxSize * kMaxSize]; + + for (int i = 0; i < h; ++i) + for (int j = 0; j < w; ++j) { + pred8[i * w + j] = rnd_.Rand8(); + ref8[i * w + j] = rnd_.Rand8(); + } + const int in_w = block_size_wide[block_idx]; + const int in_h = block_size_high[block_idx]; + + DIST_WTD_COMP_PARAMS dist_wtd_comp_params; + dist_wtd_comp_params.use_dist_wtd_comp_avg = 1; + + dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0]; + dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1]; + + const int num_loops = 1000000000 / (in_w + in_h); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + + for (int i = 0; i < num_loops; ++i) + aom_dist_wtd_comp_avg_pred_c(output, pred8, in_w, in_h, ref8, in_w, + &dist_wtd_comp_params); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("distwtdcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h, + 1000.0 * elapsed_time / num_loops); + + aom_usec_timer timer1; + aom_usec_timer_start(&timer1); + + for (int i = 0; i < num_loops; ++i) + test_impl(output2, pred8, in_w, in_h, ref8, in_w, &dist_wtd_comp_params); + + aom_usec_timer_mark(&timer1); + const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); + printf("distwtdcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h, + 1000.0 * elapsed_time1 / num_loops); + } + + libaom_test::ACMRandom rnd_; +}; // class AV1DISTWTDCOMPAVGTest + +class AV1DISTWTDCOMPAVGUPSAMPLEDTest + : public ::testing::TestWithParam<DISTWTDCOMPAVGUPSAMPLEDParam> { + public: + ~AV1DISTWTDCOMPAVGUPSAMPLEDTest() override = default; + void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); } + + protected: + void RunCheckOutput(distwtdcompavgupsampled_func test_impl) { + const int w = kMaxSize, h = kMaxSize; + const int block_idx = GET_PARAM(1); + + uint8_t pred8[kMaxSize * kMaxSize]; + uint8_t ref8[kMaxSize * kMaxSize]; + DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]); + + for (int i = 0; i < h; ++i) + for (int j = 0; j < w; ++j) { + pred8[i * w + j] = rnd_.Rand8(); + ref8[i * w + j] = rnd_.Rand8(); + } + const int in_w = block_size_wide[block_idx]; + const int in_h = block_size_high[block_idx]; + + DIST_WTD_COMP_PARAMS dist_wtd_comp_params; + dist_wtd_comp_params.use_dist_wtd_comp_avg = 1; + int sub_x_q3, sub_y_q3; + int subpel_search; + for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS; + ++subpel_search) { + for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) { + for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) { + for (int ii = 0; ii < 2; ii++) { + for (int jj = 0; jj < 4; jj++) { + dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii]; + dist_wtd_comp_params.bck_offset = + quant_dist_lookup_table[jj][1 - ii]; + + const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7); + const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7); + + aom_dist_wtd_comp_avg_upsampled_pred_c( + nullptr, nullptr, 0, 0, nullptr, output, + pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3, + sub_y_q3, ref8 + offset_r * w + offset_c, in_w, + &dist_wtd_comp_params, subpel_search); + test_impl(nullptr, nullptr, 0, 0, nullptr, output2, + pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3, + sub_y_q3, ref8 + offset_r * w + offset_c, in_w, + &dist_wtd_comp_params, subpel_search); + + for (int i = 0; i < in_h; ++i) { + for (int j = 0; j < in_w; ++j) { + int idx = i * in_w + j; + ASSERT_EQ(output[idx], output2[idx]) + << "Mismatch at unit tests for " + "AV1DISTWTDCOMPAVGUPSAMPLEDTest\n" + << in_w << "x" << in_h << " Pixel mismatch at index " + << idx << " = (" << i << ", " << j + << "), sub pixel offset = (" << sub_y_q3 << ", " + << sub_x_q3 << ")"; + } + } + } + } + } + } + } + } + void RunSpeedTest(distwtdcompavgupsampled_func test_impl) { + const int w = kMaxSize, h = kMaxSize; + const int block_idx = GET_PARAM(1); + + uint8_t pred8[kMaxSize * kMaxSize]; + uint8_t ref8[kMaxSize * kMaxSize]; + DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]); + + for (int i = 0; i < h; ++i) + for (int j = 0; j < w; ++j) { + pred8[i * w + j] = rnd_.Rand8(); + ref8[i * w + j] = rnd_.Rand8(); + } + const int in_w = block_size_wide[block_idx]; + const int in_h = block_size_high[block_idx]; + + DIST_WTD_COMP_PARAMS dist_wtd_comp_params; + dist_wtd_comp_params.use_dist_wtd_comp_avg = 1; + + dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0]; + dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1]; + + int sub_x_q3 = 0; + int sub_y_q3 = 0; + + const int num_loops = 1000000000 / (in_w + in_h); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + int subpel_search = USE_8_TAPS; // set to USE_4_TAPS to test 4-tap filter. + + for (int i = 0; i < num_loops; ++i) + aom_dist_wtd_comp_avg_upsampled_pred_c( + nullptr, nullptr, 0, 0, nullptr, output, pred8, in_w, in_h, sub_x_q3, + sub_y_q3, ref8, in_w, &dist_wtd_comp_params, subpel_search); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("distwtdcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w, in_h, + 1000.0 * elapsed_time / num_loops); + + aom_usec_timer timer1; + aom_usec_timer_start(&timer1); + + for (int i = 0; i < num_loops; ++i) + test_impl(nullptr, nullptr, 0, 0, nullptr, output2, pred8, in_w, in_h, + sub_x_q3, sub_y_q3, ref8, in_w, &dist_wtd_comp_params, + subpel_search); + + aom_usec_timer_mark(&timer1); + const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); + printf("distwtdcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w, in_h, + 1000.0 * elapsed_time1 / num_loops); + } + + libaom_test::ACMRandom rnd_; +}; // class AV1DISTWTDCOMPAVGUPSAMPLEDTest + +class DistWtdCompAvgTest + : public ::testing::WithParamInterface<DistWtdCompAvgParam>, + public ::testing::Test { + public: + DistWtdCompAvgTest() + : width_(GET_PARAM(0)), height_(GET_PARAM(1)), bd_(GET_PARAM(3)) {} + + static void SetUpTestSuite() { + reference_data8_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBufferSize)); + ASSERT_NE(reference_data8_, nullptr); + second_pred8_ = + reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128)); + ASSERT_NE(second_pred8_, nullptr); + comp_pred8_ = + reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128)); + ASSERT_NE(comp_pred8_, nullptr); + comp_pred8_test_ = + reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128)); + ASSERT_NE(comp_pred8_test_, nullptr); + reference_data16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t))); + ASSERT_NE(reference_data16_, nullptr); + second_pred16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t))); + ASSERT_NE(second_pred16_, nullptr); + comp_pred16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t))); + ASSERT_NE(comp_pred16_, nullptr); + comp_pred16_test_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t))); + ASSERT_NE(comp_pred16_test_, nullptr); + } + + static void TearDownTestSuite() { + aom_free(reference_data8_); + reference_data8_ = nullptr; + aom_free(second_pred8_); + second_pred8_ = nullptr; + aom_free(comp_pred8_); + comp_pred8_ = nullptr; + aom_free(comp_pred8_test_); + comp_pred8_test_ = nullptr; + aom_free(reference_data16_); + reference_data16_ = nullptr; + aom_free(second_pred16_); + second_pred16_ = nullptr; + aom_free(comp_pred16_); + comp_pred16_ = nullptr; + aom_free(comp_pred16_test_); + comp_pred16_test_ = nullptr; + } + + protected: + // Handle up to 4 128x128 blocks, with stride up to 256 + static const int kDataAlignment = 16; + static const int kDataBlockSize = 128 * 256; + static const int kDataBufferSize = 4 * kDataBlockSize; + + void SetUp() override { + if (bd_ == -1) { + use_high_bit_depth_ = false; + bit_depth_ = AOM_BITS_8; + reference_data_ = reference_data8_; + second_pred_ = second_pred8_; + comp_pred_ = comp_pred8_; + comp_pred_test_ = comp_pred8_test_; + } else { + use_high_bit_depth_ = true; + bit_depth_ = static_cast<aom_bit_depth_t>(bd_); + reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_); + second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_); + comp_pred_ = CONVERT_TO_BYTEPTR(comp_pred16_); + comp_pred_test_ = CONVERT_TO_BYTEPTR(comp_pred16_test_); + } + mask_ = (1 << bit_depth_) - 1; + reference_stride_ = width_ * 2; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + virtual uint8_t *GetReference(int block_idx) { + if (use_high_bit_depth_) + return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) + + block_idx * kDataBlockSize); + return reference_data_ + block_idx * kDataBlockSize; + } + + void ReferenceDistWtdCompAvg(int block_idx) { + const uint8_t *const reference8 = GetReference(block_idx); + const uint8_t *const second_pred8 = second_pred_; + uint8_t *const comp_pred8 = comp_pred_; + const uint16_t *const reference16 = + CONVERT_TO_SHORTPTR(GetReference(block_idx)); + const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_); + uint16_t *const comp_pred16 = CONVERT_TO_SHORTPTR(comp_pred_); + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + const int tmp = + second_pred8[h * width_ + w] * jcp_param_.bck_offset + + reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset; + comp_pred8[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4); + } else { + const int tmp = + second_pred16[h * width_ + w] * jcp_param_.bck_offset + + reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset; + comp_pred16[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4); + } + } + } + } + + void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) { + uint8_t *data8 = data; + uint16_t *data16 = CONVERT_TO_SHORTPTR(data); + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + data8[h * stride + w] = static_cast<uint8_t>(fill_constant); + } else { + data16[h * stride + w] = fill_constant; + } + } + } + } + + void FillRandom(uint8_t *data, int stride) { + uint8_t *data8 = data; + uint16_t *data16 = CONVERT_TO_SHORTPTR(data); + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + data8[h * stride + w] = rnd_.Rand8(); + } else { + data16[h * stride + w] = rnd_.Rand16() & mask_; + } + } + } + } + + void dist_wtd_comp_avg(int block_idx) { + const uint8_t *const reference = GetReference(block_idx); + + API_REGISTER_STATE_CHECK(GET_PARAM(2)(comp_pred_test_, second_pred_, width_, + height_, reference, reference_stride_, + &jcp_param_)); + } + + void CheckCompAvg() { + for (int j = 0; j < 2; ++j) { + for (int i = 0; i < 4; ++i) { + jcp_param_.fwd_offset = quant_dist_lookup_table[i][j]; + jcp_param_.bck_offset = quant_dist_lookup_table[i][1 - j]; + + ReferenceDistWtdCompAvg(0); + dist_wtd_comp_avg(0); + + for (int y = 0; y < height_; ++y) + for (int x = 0; x < width_; ++x) + ASSERT_EQ(comp_pred_[y * width_ + x], + comp_pred_test_[y * width_ + x]); + } + } + } + + int width_, height_, mask_, bd_; + aom_bit_depth_t bit_depth_; + static uint8_t *reference_data_; + static uint8_t *second_pred_; + bool use_high_bit_depth_; + static uint8_t *reference_data8_; + static uint8_t *second_pred8_; + static uint16_t *reference_data16_; + static uint16_t *second_pred16_; + int reference_stride_; + static uint8_t *comp_pred_; + static uint8_t *comp_pred8_; + static uint16_t *comp_pred16_; + static uint8_t *comp_pred_test_; + static uint8_t *comp_pred8_test_; + static uint16_t *comp_pred16_test_; + DIST_WTD_COMP_PARAMS jcp_param_; + + ACMRandom rnd_; +}; + +#if CONFIG_AV1_HIGHBITDEPTH +class AV1HighBDDISTWTDCOMPAVGTest + : public ::testing::TestWithParam<HighbdDISTWTDCOMPAVGParam> { + public: + ~AV1HighBDDISTWTDCOMPAVGTest() override = default; + void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); } + + protected: + void RunCheckOutput(distwtdcompavg_func test_impl) { + const int w = kMaxSize, h = kMaxSize; + const int block_idx = GET_PARAM(2); + const int bd = GET_PARAM(0); + uint16_t pred8[kMaxSize * kMaxSize]; + uint16_t ref8[kMaxSize * kMaxSize]; + uint16_t output[kMaxSize * kMaxSize]; + uint16_t output2[kMaxSize * kMaxSize]; + + for (int i = 0; i < h; ++i) + for (int j = 0; j < w; ++j) { + pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + } + const int in_w = block_size_wide[block_idx]; + const int in_h = block_size_high[block_idx]; + + DIST_WTD_COMP_PARAMS dist_wtd_comp_params; + dist_wtd_comp_params.use_dist_wtd_comp_avg = 1; + + for (int ii = 0; ii < 2; ii++) { + for (int jj = 0; jj < 4; jj++) { + dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii]; + dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[jj][1 - ii]; + + const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7); + const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7); + aom_highbd_dist_wtd_comp_avg_pred_c( + CONVERT_TO_BYTEPTR(output), + CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w, in_h, + CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w, + &dist_wtd_comp_params); + test_impl(CONVERT_TO_BYTEPTR(output2), + CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w, + in_h, CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, + in_w, &dist_wtd_comp_params); + + for (int i = 0; i < in_h; ++i) { + for (int j = 0; j < in_w; ++j) { + int idx = i * in_w + j; + ASSERT_EQ(output[idx], output2[idx]) + << "Mismatch at unit tests for AV1HighBDDISTWTDCOMPAVGTest\n" + << in_w << "x" << in_h << " Pixel mismatch at index " << idx + << " = (" << i << ", " << j << ")"; + } + } + } + } + } + void RunSpeedTest(distwtdcompavg_func test_impl) { + const int w = kMaxSize, h = kMaxSize; + const int block_idx = GET_PARAM(2); + const int bd = GET_PARAM(0); + uint16_t pred8[kMaxSize * kMaxSize]; + uint16_t ref8[kMaxSize * kMaxSize]; + uint16_t output[kMaxSize * kMaxSize]; + uint16_t output2[kMaxSize * kMaxSize]; + + for (int i = 0; i < h; ++i) + for (int j = 0; j < w; ++j) { + pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + } + const int in_w = block_size_wide[block_idx]; + const int in_h = block_size_high[block_idx]; + + DIST_WTD_COMP_PARAMS dist_wtd_comp_params; + dist_wtd_comp_params.use_dist_wtd_comp_avg = 1; + + dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0]; + dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1]; + + const int num_loops = 1000000000 / (in_w + in_h); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + + for (int i = 0; i < num_loops; ++i) + aom_highbd_dist_wtd_comp_avg_pred_c( + CONVERT_TO_BYTEPTR(output), CONVERT_TO_BYTEPTR(pred8), in_w, in_h, + CONVERT_TO_BYTEPTR(ref8), in_w, &dist_wtd_comp_params); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("highbddistwtdcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h, + 1000.0 * elapsed_time / num_loops); + + aom_usec_timer timer1; + aom_usec_timer_start(&timer1); + + for (int i = 0; i < num_loops; ++i) + test_impl(CONVERT_TO_BYTEPTR(output2), CONVERT_TO_BYTEPTR(pred8), in_w, + in_h, CONVERT_TO_BYTEPTR(ref8), in_w, &dist_wtd_comp_params); + + aom_usec_timer_mark(&timer1); + const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); + printf("highbddistwtdcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h, + 1000.0 * elapsed_time1 / num_loops); + } + + libaom_test::ACMRandom rnd_; +}; // class AV1HighBDDISTWTDCOMPAVGTest + +class AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest + : public ::testing::TestWithParam<HighbdDISTWTDCOMPAVGUPSAMPLEDParam> { + public: + ~AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest() override = default; + void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); } + + protected: + void RunCheckOutput(highbddistwtdcompavgupsampled_func test_impl) { + const int w = kMaxSize, h = kMaxSize; + const int block_idx = GET_PARAM(2); + const int bd = GET_PARAM(0); + uint16_t pred8[kMaxSize * kMaxSize]; + uint16_t ref8[kMaxSize * kMaxSize]; + DECLARE_ALIGNED(16, uint16_t, output[kMaxSize * kMaxSize]); + DECLARE_ALIGNED(16, uint16_t, output2[kMaxSize * kMaxSize]); + + for (int i = 0; i < h; ++i) + for (int j = 0; j < w; ++j) { + pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + } + const int in_w = block_size_wide[block_idx]; + const int in_h = block_size_high[block_idx]; + + DIST_WTD_COMP_PARAMS dist_wtd_comp_params; + dist_wtd_comp_params.use_dist_wtd_comp_avg = 1; + int sub_x_q3, sub_y_q3; + int subpel_search; + for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS; + ++subpel_search) { + for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) { + for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) { + for (int ii = 0; ii < 2; ii++) { + for (int jj = 0; jj < 4; jj++) { + dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii]; + dist_wtd_comp_params.bck_offset = + quant_dist_lookup_table[jj][1 - ii]; + + const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7); + const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7); + + aom_highbd_dist_wtd_comp_avg_upsampled_pred_c( + nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output), + CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w, + in_h, sub_x_q3, sub_y_q3, + CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w, bd, + &dist_wtd_comp_params, subpel_search); + test_impl(nullptr, nullptr, 0, 0, nullptr, + CONVERT_TO_BYTEPTR(output2), + CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, + in_w, in_h, sub_x_q3, sub_y_q3, + CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, + in_w, bd, &dist_wtd_comp_params, subpel_search); + + for (int i = 0; i < in_h; ++i) { + for (int j = 0; j < in_w; ++j) { + int idx = i * in_w + j; + ASSERT_EQ(output[idx], output2[idx]) + << "Mismatch at unit tests for " + "AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest\n" + << in_w << "x" << in_h << " Pixel mismatch at index " + << idx << " = (" << i << ", " << j + << "), sub pixel offset = (" << sub_y_q3 << ", " + << sub_x_q3 << ")"; + } + } + } + } + } + } + } + } + void RunSpeedTest(highbddistwtdcompavgupsampled_func test_impl) { + const int w = kMaxSize, h = kMaxSize; + const int block_idx = GET_PARAM(2); + const int bd = GET_PARAM(0); + uint16_t pred8[kMaxSize * kMaxSize]; + uint16_t ref8[kMaxSize * kMaxSize]; + DECLARE_ALIGNED(16, uint16_t, output[kMaxSize * kMaxSize]); + DECLARE_ALIGNED(16, uint16_t, output2[kMaxSize * kMaxSize]); + + for (int i = 0; i < h; ++i) + for (int j = 0; j < w; ++j) { + pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + } + const int in_w = block_size_wide[block_idx]; + const int in_h = block_size_high[block_idx]; + + DIST_WTD_COMP_PARAMS dist_wtd_comp_params; + dist_wtd_comp_params.use_dist_wtd_comp_avg = 1; + + dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0]; + dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1]; + int sub_x_q3 = 0; + int sub_y_q3 = 0; + const int num_loops = 1000000000 / (in_w + in_h); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + int subpel_search = USE_8_TAPS; // set to USE_4_TAPS to test 4-tap filter. + for (int i = 0; i < num_loops; ++i) + aom_highbd_dist_wtd_comp_avg_upsampled_pred_c( + nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output), + CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3, + CONVERT_TO_BYTEPTR(ref8), in_w, bd, &dist_wtd_comp_params, + subpel_search); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("highbddistwtdcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w, + in_h, 1000.0 * elapsed_time / num_loops); + + aom_usec_timer timer1; + aom_usec_timer_start(&timer1); + + for (int i = 0; i < num_loops; ++i) + test_impl(nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output2), + CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3, + CONVERT_TO_BYTEPTR(ref8), in_w, bd, &dist_wtd_comp_params, + subpel_search); + + aom_usec_timer_mark(&timer1); + const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); + printf("highbddistwtdcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w, + in_h, 1000.0 * elapsed_time1 / num_loops); + } + + libaom_test::ACMRandom rnd_; +}; // class AV1HighBDDISTWTDCOMPAVGUPSAMPLEDTest +#endif // CONFIG_AV1_HIGHBITDEPTH + +} // namespace AV1DISTWTDCOMPAVG +} // namespace libaom_test + +#endif // AOM_TEST_COMP_AVG_PRED_TEST_H_ diff --git a/third_party/aom/test/comp_mask_pred_test.cc b/third_party/aom/test/comp_mask_pred_test.cc new file mode 100644 index 0000000000..b65730aa57 --- /dev/null +++ b/third_party/aom/test/comp_mask_pred_test.cc @@ -0,0 +1,856 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdlib> +#include <new> +#include <tuple> + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_dsp/variance.h" +#include "aom_mem/aom_mem.h" +#include "aom_ports/aom_timer.h" +#include "aom_ports/mem.h" +#include "av1/common/reconinter.h" +#include "av1/encoder/reconinter_enc.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { +typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred, + int width, int height, const uint8_t *ref, + int ref_stride, const uint8_t *mask, + int mask_stride, int invert_mask); + +typedef void (*comp_avg_pred_func)(uint8_t *comp_pred, const uint8_t *pred, + int width, int height, const uint8_t *ref, + int ref_stride); + +#if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON +const BLOCK_SIZE kCompMaskPredParams[] = { + BLOCK_8X8, BLOCK_8X16, BLOCK_8X32, BLOCK_16X8, BLOCK_16X16, + BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32 +}; +#endif + +class AV1CompMaskPredBase : public ::testing::Test { + public: + ~AV1CompMaskPredBase() override; + void SetUp() override; + + void TearDown() override; + + protected: + bool CheckResult(int width, int height) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int idx = y * width + x; + if (comp_pred1_[idx] != comp_pred2_[idx]) { + printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x); + printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]); + return false; + } + } + } + return true; + } + + libaom_test::ACMRandom rnd_; + uint8_t *comp_pred1_; + uint8_t *comp_pred2_; + uint8_t *pred_; + uint8_t *ref_buffer_; + uint8_t *ref_; +}; + +AV1CompMaskPredBase::~AV1CompMaskPredBase() = default; + +void AV1CompMaskPredBase::SetUp() { + rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); + av1_init_wedge_masks(); + comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); + ASSERT_NE(comp_pred1_, nullptr); + comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); + ASSERT_NE(comp_pred2_, nullptr); + pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); + ASSERT_NE(pred_, nullptr); + // The biggest block size is MAX_SB_SQUARE(128*128), however for the + // convolution we need to access 3 bytes before and 4 bytes after (for an + // 8-tap filter), in both directions, so we need to allocate + // (128 + 7) * (128 + 7) = MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49 + ref_buffer_ = + (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49); + ASSERT_NE(ref_buffer_, nullptr); + // Start of the actual block where the convolution will be computed + ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3); + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pred_[i] = rnd_.Rand8(); + } + for (int i = 0; i < MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49; ++i) { + ref_buffer_[i] = rnd_.Rand8(); + } +} + +void AV1CompMaskPredBase::TearDown() { + aom_free(comp_pred1_); + aom_free(comp_pred2_); + aom_free(pred_); + aom_free(ref_buffer_); +} + +typedef std::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam; + +class AV1CompMaskPredTest + : public AV1CompMaskPredBase, + public ::testing::WithParamInterface<CompMaskPredParam> { + protected: + void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv); + void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize); +}; + +void AV1CompMaskPredTest::RunCheckOutput(comp_mask_pred_func test_impl, + BLOCK_SIZE bsize, int inv) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int wedge_types = get_wedge_types_lookup(bsize); + for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { + const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize); + + aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, + inv); + test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv); + + ASSERT_EQ(CheckResult(w, h), true) + << " wedge " << wedge_index << " inv " << inv; + } +} + +void AV1CompMaskPredTest::RunSpeedTest(comp_mask_pred_func test_impl, + BLOCK_SIZE bsize) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int wedge_types = get_wedge_types_lookup(bsize); + int wedge_index = wedge_types / 2; + const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize); + const int num_loops = 1000000000 / (w + h); + + comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl }; + double elapsed_time[2] = { 0 }; + for (int i = 0; i < 2; ++i) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + comp_mask_pred_func func = funcs[i]; + for (int j = 0; j < num_loops; ++j) { + func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0); + } + aom_usec_timer_mark(&timer); + double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); + elapsed_time[i] = 1000.0 * time / num_loops; + } + printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0], + elapsed_time[1]); + printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompMaskPredTest); + +TEST_P(AV1CompMaskPredTest, CheckOutput) { + // inv = 0, 1 + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1); +} + +TEST_P(AV1CompMaskPredTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0), GET_PARAM(1)); +} + +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P( + SSSE3, AV1CompMaskPredTest, + ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3), + ::testing::ValuesIn(kCompMaskPredParams))); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1CompMaskPredTest, + ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2), + ::testing::ValuesIn(kCompMaskPredParams))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1CompMaskPredTest, + ::testing::Combine(::testing::Values(&aom_comp_mask_pred_neon), + ::testing::ValuesIn(kCompMaskPredParams))); +#endif + +#if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON +const BLOCK_SIZE kValidBlockSize[] = { + BLOCK_4X4, BLOCK_8X8, BLOCK_8X16, BLOCK_8X32, BLOCK_16X8, + BLOCK_16X16, BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32, + BLOCK_32X64, BLOCK_64X32, BLOCK_64X64, BLOCK_64X128, BLOCK_128X64, + BLOCK_128X128, BLOCK_16X64, BLOCK_64X16 +}; +#endif + +typedef void (*upsampled_pred_func)(MACROBLOCKD *xd, const AV1_COMMON *const cm, + int mi_row, int mi_col, const MV *const mv, + uint8_t *comp_pred, int width, int height, + int subpel_x_q3, int subpel_y_q3, + const uint8_t *ref, int ref_stride, + int subpel_search); + +typedef std::tuple<upsampled_pred_func, BLOCK_SIZE> UpsampledPredParam; + +class AV1UpsampledPredTest + : public AV1CompMaskPredBase, + public ::testing::WithParamInterface<UpsampledPredParam> { + protected: + void RunCheckOutput(upsampled_pred_func test_impl, BLOCK_SIZE bsize); + void RunSpeedTest(upsampled_pred_func test_impl, BLOCK_SIZE bsize, + int havSub); +}; + +void AV1UpsampledPredTest::RunCheckOutput(upsampled_pred_func test_impl, + BLOCK_SIZE bsize) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + for (int subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS; + ++subpel_search) { + // loop through subx and suby + for (int sub = 0; sub < 8 * 8; ++sub) { + int subx = sub & 0x7; + int suby = (sub >> 3); + + aom_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h, + subx, suby, ref_, MAX_SB_SIZE, subpel_search); + + test_impl(nullptr, nullptr, 0, 0, nullptr, comp_pred2_, w, h, subx, suby, + ref_, MAX_SB_SIZE, subpel_search); + ASSERT_EQ(CheckResult(w, h), true) + << "sub (" << subx << "," << suby << ")"; + } + } +} + +void AV1UpsampledPredTest::RunSpeedTest(upsampled_pred_func test_impl, + BLOCK_SIZE bsize, int havSub) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int subx = havSub ? 3 : 0; + const int suby = havSub ? 4 : 0; + + const int num_loops = 1000000000 / (w + h); + upsampled_pred_func funcs[2] = { aom_upsampled_pred_c, test_impl }; + double elapsed_time[2] = { 0 }; + int subpel_search = USE_8_TAPS; // set to USE_4_TAPS to test 4-tap filter. + for (int i = 0; i < 2; ++i) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + upsampled_pred_func func = funcs[i]; + for (int j = 0; j < num_loops; ++j) { + func(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h, subx, suby, ref_, + MAX_SB_SIZE, subpel_search); + } + aom_usec_timer_mark(&timer); + double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); + elapsed_time[i] = 1000.0 * time / num_loops; + } + printf("UpsampledPred[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, + elapsed_time[0], elapsed_time[1]); + printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1UpsampledPredTest); + +TEST_P(AV1UpsampledPredTest, CheckOutput) { + RunCheckOutput(GET_PARAM(0), GET_PARAM(1)); +} + +TEST_P(AV1UpsampledPredTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1); +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AV1UpsampledPredTest, + ::testing::Combine(::testing::Values(&aom_upsampled_pred_sse2), + ::testing::ValuesIn(kValidBlockSize))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1UpsampledPredTest, + ::testing::Combine(::testing::Values(&aom_upsampled_pred_neon), + ::testing::ValuesIn(kValidBlockSize))); +#endif + +typedef std::tuple<comp_avg_pred_func, BLOCK_SIZE> CompAvgPredParam; + +class AV1CompAvgPredTest : public ::testing::TestWithParam<CompAvgPredParam> { + public: + ~AV1CompAvgPredTest() override; + void SetUp() override; + + void TearDown() override; + + protected: + void RunCheckOutput(comp_avg_pred_func test_impl, BLOCK_SIZE bsize); + void RunSpeedTest(comp_avg_pred_func test_impl, BLOCK_SIZE bsize); + bool CheckResult(int width, int height) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int idx = y * width + x; + if (comp_pred1_[idx] != comp_pred2_[idx]) { + printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y); + printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]); + return false; + } + } + } + return true; + } + + libaom_test::ACMRandom rnd_; + uint8_t *comp_pred1_; + uint8_t *comp_pred2_; + uint8_t *pred_; + uint8_t *ref_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompAvgPredTest); + +AV1CompAvgPredTest::~AV1CompAvgPredTest() = default; + +void AV1CompAvgPredTest::SetUp() { + rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); + + comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); + ASSERT_NE(comp_pred1_, nullptr); + comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); + ASSERT_NE(comp_pred2_, nullptr); + pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); + ASSERT_NE(pred_, nullptr); + ref_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); + ASSERT_NE(ref_, nullptr); + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pred_[i] = rnd_.Rand8(); + } + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + ref_[i] = rnd_.Rand8(); + } +} + +void AV1CompAvgPredTest::TearDown() { + aom_free(comp_pred1_); + aom_free(comp_pred2_); + aom_free(pred_); + aom_free(ref_); +} + +void AV1CompAvgPredTest::RunCheckOutput(comp_avg_pred_func test_impl, + BLOCK_SIZE bsize) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + aom_comp_avg_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE); + test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE); + + ASSERT_EQ(CheckResult(w, h), true); +} + +void AV1CompAvgPredTest::RunSpeedTest(comp_avg_pred_func test_impl, + BLOCK_SIZE bsize) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int num_loops = 1000000000 / (w + h); + + comp_avg_pred_func functions[2] = { aom_comp_avg_pred_c, test_impl }; + double elapsed_time[2] = { 0.0 }; + for (int i = 0; i < 2; ++i) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + comp_avg_pred_func func = functions[i]; + for (int j = 0; j < num_loops; ++j) { + func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE); + } + aom_usec_timer_mark(&timer); + const double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); + elapsed_time[i] = 1000.0 * time; + } + printf("CompAvgPred %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0], + elapsed_time[1]); + printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); +} + +TEST_P(AV1CompAvgPredTest, CheckOutput) { + RunCheckOutput(GET_PARAM(0), GET_PARAM(1)); +} + +TEST_P(AV1CompAvgPredTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0), GET_PARAM(1)); +} + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1CompAvgPredTest, + ::testing::Combine(::testing::Values(&aom_comp_avg_pred_avx2), + ::testing::ValuesIn(kValidBlockSize))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1CompAvgPredTest, + ::testing::Combine(::testing::Values(&aom_comp_avg_pred_neon), + ::testing::ValuesIn(kValidBlockSize))); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +class AV1HighbdCompMaskPredTestBase : public ::testing::Test { + public: + ~AV1HighbdCompMaskPredTestBase() override; + void SetUp() override; + + void TearDown() override; + + protected: + bool CheckResult(int width, int height) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int idx = y * width + x; + if (comp_pred1_[idx] != comp_pred2_[idx]) { + printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x); + printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]); + return false; + } + } + } + return true; + } + + libaom_test::ACMRandom rnd_; + uint16_t *comp_pred1_; + uint16_t *comp_pred2_; + uint16_t *pred_; + uint16_t *ref_buffer_; + uint16_t *ref_; +}; + +AV1HighbdCompMaskPredTestBase::~AV1HighbdCompMaskPredTestBase() = default; + +void AV1HighbdCompMaskPredTestBase::SetUp() { + rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); + av1_init_wedge_masks(); + + comp_pred1_ = + (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_)); + ASSERT_NE(comp_pred1_, nullptr); + comp_pred2_ = + (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_)); + ASSERT_NE(comp_pred2_, nullptr); + pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_)); + ASSERT_NE(pred_, nullptr); + // The biggest block size is MAX_SB_SQUARE(128*128), however for the + // convolution we need to access 3 elements before and 4 elements after (for + // an 8-tap filter), in both directions, so we need to allocate (128 + 7) * + // (128 + 7) = (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) * + // sizeof(*ref_buffer_) + ref_buffer_ = (uint16_t *)aom_memalign( + 16, (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) * sizeof(*ref_buffer_)); + ASSERT_NE(ref_buffer_, nullptr); + // Start of the actual block where the convolution will be computed + ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3); +} + +void AV1HighbdCompMaskPredTestBase::TearDown() { + aom_free(comp_pred1_); + aom_free(comp_pred2_); + aom_free(pred_); + aom_free(ref_buffer_); +} + +typedef void (*highbd_comp_mask_pred_func)(uint8_t *comp_pred8, + const uint8_t *pred8, int width, + int height, const uint8_t *ref8, + int ref_stride, const uint8_t *mask, + int mask_stride, int invert_mask); + +typedef std::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int> + HighbdCompMaskPredParam; + +class AV1HighbdCompMaskPredTest + : public AV1HighbdCompMaskPredTestBase, + public ::testing::WithParamInterface<HighbdCompMaskPredParam> { + public: + ~AV1HighbdCompMaskPredTest() override; + + protected: + void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv); + void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize); +}; + +AV1HighbdCompMaskPredTest::~AV1HighbdCompMaskPredTest() = default; + +void AV1HighbdCompMaskPredTest::RunCheckOutput( + highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) { + int bd_ = GET_PARAM(2); + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int wedge_types = get_wedge_types_lookup(bsize); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } + for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) { + ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } + + for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { + const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize); + + aom_highbd_comp_mask_pred_c( + CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h, + CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv); + + test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h, + CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv); + + ASSERT_EQ(CheckResult(w, h), true) + << " wedge " << wedge_index << " inv " << inv; + } +} + +void AV1HighbdCompMaskPredTest::RunSpeedTest( + highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) { + int bd_ = GET_PARAM(2); + + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int wedge_types = get_wedge_types_lookup(bsize); + int wedge_index = wedge_types / 2; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } + for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) { + ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } + + const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize); + const int num_loops = 1000000000 / (w + h); + + highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c, + test_impl }; + double elapsed_time[2] = { 0 }; + for (int i = 0; i < 2; ++i) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + highbd_comp_mask_pred_func func = funcs[i]; + for (int j = 0; j < num_loops; ++j) { + func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h, + CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0); + } + aom_usec_timer_mark(&timer); + double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); + elapsed_time[i] = 1000.0 * time / num_loops; + } + printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0], + elapsed_time[1]); + printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompMaskPredTest); + +TEST_P(AV1HighbdCompMaskPredTest, CheckOutput) { + // inv = 0, 1 + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0); + RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1); +} + +TEST_P(AV1HighbdCompMaskPredTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0), GET_PARAM(1)); +} + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1HighbdCompMaskPredTest, + ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_neon), + ::testing::ValuesIn(kCompMaskPredParams), + ::testing::Range(8, 13, 2))); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1HighbdCompMaskPredTest, + ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2), + ::testing::ValuesIn(kCompMaskPredParams), + ::testing::Range(8, 13, 2))); +#endif + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AV1HighbdCompMaskPredTest, + ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2), + ::testing::ValuesIn(kCompMaskPredParams), + ::testing::Range(8, 13, 2))); +#endif + +typedef void (*highbd_upsampled_pred_func)( + MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, + const MV *const mv, uint8_t *comp_pred8, int width, int height, + int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, + int bd, int subpel_search); + +typedef std::tuple<highbd_upsampled_pred_func, BLOCK_SIZE, int> + HighbdUpsampledPredParam; + +class AV1HighbdUpsampledPredTest + : public AV1HighbdCompMaskPredTestBase, + public ::testing::WithParamInterface<HighbdUpsampledPredParam> { + public: + ~AV1HighbdUpsampledPredTest() override; + + protected: + void RunCheckOutput(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize); + void RunSpeedTest(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize, + int havSub); +}; + +AV1HighbdUpsampledPredTest::~AV1HighbdUpsampledPredTest() = default; + +void AV1HighbdUpsampledPredTest::RunCheckOutput( + highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize) { + int bd_ = GET_PARAM(2); + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } + for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) { + ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } + + for (int subpel_search = 1; subpel_search <= 2; ++subpel_search) { + // loop through subx and suby + for (int sub = 0; sub < 8 * 8; ++sub) { + int subx = sub & 0x7; + int suby = (sub >> 3); + + aom_highbd_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr, + CONVERT_TO_BYTEPTR(comp_pred1_), w, h, subx, + suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, + bd_, subpel_search); + + test_impl(nullptr, nullptr, 0, 0, nullptr, + CONVERT_TO_BYTEPTR(comp_pred2_), w, h, subx, suby, + CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search); + + ASSERT_EQ(CheckResult(w, h), true) + << "sub (" << subx << "," << suby << ")"; + } + } +} + +void AV1HighbdUpsampledPredTest::RunSpeedTest( + highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize, int havSub) { + int bd_ = GET_PARAM(2); + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int subx = havSub ? 3 : 0; + const int suby = havSub ? 4 : 0; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } + for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) { + ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } + + const int num_loops = 1000000000 / (w + h); + highbd_upsampled_pred_func funcs[2] = { &aom_highbd_upsampled_pred_c, + test_impl }; + double elapsed_time[2] = { 0 }; + for (int i = 0; i < 2; ++i) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + highbd_upsampled_pred_func func = funcs[i]; + int subpel_search = 2; // set to 1 to test 4-tap filter. + for (int j = 0; j < num_loops; ++j) { + func(nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(comp_pred1_), w, + h, subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, + subpel_search); + } + aom_usec_timer_mark(&timer); + double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); + elapsed_time[i] = 1000.0 * time / num_loops; + } + printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0], + elapsed_time[1]); + printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdUpsampledPredTest); + +TEST_P(AV1HighbdUpsampledPredTest, CheckOutput) { + RunCheckOutput(GET_PARAM(0), GET_PARAM(1)); +} + +TEST_P(AV1HighbdUpsampledPredTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1); +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AV1HighbdUpsampledPredTest, + ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_sse2), + ::testing::ValuesIn(kValidBlockSize), + ::testing::Range(8, 13, 2))); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1HighbdUpsampledPredTest, + ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_neon), + ::testing::ValuesIn(kValidBlockSize), + ::testing::Range(8, 13, 2))); +#endif + +typedef void (*highbd_comp_avg_pred_func)(uint8_t *comp_pred, + const uint8_t *pred, int width, + int height, const uint8_t *ref, + int ref_stride); + +typedef std::tuple<highbd_comp_avg_pred_func, BLOCK_SIZE, int> + HighbdCompAvgPredParam; + +class AV1HighbdCompAvgPredTest + : public ::testing::TestWithParam<HighbdCompAvgPredParam> { + public: + ~AV1HighbdCompAvgPredTest() override; + void SetUp() override; + + protected: + void RunCheckOutput(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize); + void RunSpeedTest(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize); + bool CheckResult(int width, int height) const { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int idx = y * width + x; + if (comp_pred1_[idx] != comp_pred2_[idx]) { + printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y); + printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]); + return false; + } + } + } + return true; + } + + libaom_test::ACMRandom rnd_; + uint16_t *comp_pred1_; + uint16_t *comp_pred2_; + uint16_t *pred_; + uint16_t *ref_; +}; + +AV1HighbdCompAvgPredTest::~AV1HighbdCompAvgPredTest() { + aom_free(comp_pred1_); + aom_free(comp_pred2_); + aom_free(pred_); + aom_free(ref_); +} + +void AV1HighbdCompAvgPredTest::SetUp() { + int bd_ = GET_PARAM(2); + rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); + + comp_pred1_ = + (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_)); + ASSERT_NE(comp_pred1_, nullptr); + comp_pred2_ = + (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_)); + ASSERT_NE(comp_pred2_, nullptr); + pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_)); + ASSERT_NE(pred_, nullptr); + ref_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*ref_)); + ASSERT_NE(ref_, nullptr); + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + ref_[i] = rnd_.Rand16() & ((1 << bd_) - 1); + } +} + +void AV1HighbdCompAvgPredTest::RunCheckOutput( + highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(comp_pred1_), + CONVERT_TO_BYTEPTR(pred_), w, h, + CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE); + test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h, + CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE); + + ASSERT_EQ(CheckResult(w, h), true); +} + +void AV1HighbdCompAvgPredTest::RunSpeedTest(highbd_comp_avg_pred_func test_impl, + BLOCK_SIZE bsize) { + const int w = block_size_wide[bsize]; + const int h = block_size_high[bsize]; + const int num_loops = 1000000000 / (w + h); + + highbd_comp_avg_pred_func functions[2] = { aom_highbd_comp_avg_pred_c, + test_impl }; + double elapsed_time[2] = { 0.0 }; + for (int i = 0; i < 2; ++i) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + highbd_comp_avg_pred_func func = functions[i]; + for (int j = 0; j < num_loops; ++j) { + func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h, + CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE); + } + aom_usec_timer_mark(&timer); + const double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); + elapsed_time[i] = 1000.0 * time; + } + printf("HighbdCompAvg %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0], + elapsed_time[1]); + printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompAvgPredTest); + +TEST_P(AV1HighbdCompAvgPredTest, CheckOutput) { + RunCheckOutput(GET_PARAM(0), GET_PARAM(1)); +} + +TEST_P(AV1HighbdCompAvgPredTest, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(0), GET_PARAM(1)); +} + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1HighbdCompAvgPredTest, + ::testing::Combine(::testing::Values(&aom_highbd_comp_avg_pred_neon), + ::testing::ValuesIn(kValidBlockSize), + ::testing::Range(8, 13, 2))); +#endif + +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/convolve_test.cc b/third_party/aom/test/convolve_test.cc new file mode 100644 index 0000000000..c97f814057 --- /dev/null +++ b/third_party/aom/test/convolve_test.cc @@ -0,0 +1,922 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string.h> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom_dsp/aom_dsp_common.h" +#include "aom_dsp/aom_filter.h" +#include "aom_mem/aom_mem.h" +#include "aom_ports/aom_timer.h" +#include "aom_ports/mem.h" +#include "av1/common/filter.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { + +static const unsigned int kMaxDimension = MAX_SB_SIZE; + +static const int16_t kInvalidFilter[8] = {}; +static const int kNumFilterBanks = SWITCHABLE_FILTERS; +static const int kNumFilters = 16; + +typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h); + +struct ConvolveFunctions { + ConvolveFunctions(ConvolveFunc h8, ConvolveFunc v8, int bd) + : h8_(h8), v8_(v8), use_highbd_(bd) {} + + ConvolveFunc h8_; + ConvolveFunc v8_; + int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth. +}; + +typedef std::tuple<int, int, const ConvolveFunctions *> ConvolveParam; + +#define ALL_SIZES_64(convolve_fn) \ + make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \ + make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \ + make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \ + make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \ + make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \ + make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \ + make_tuple(64, 64, &convolve_fn) + +#define ALL_SIZES(convolve_fn) \ + make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \ + make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn) + +// Reference 8-tap subpixel filter, slightly modified to fit into this test. +#define AV1_FILTER_WEIGHT 128 +#define AV1_FILTER_SHIFT 7 +uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; } + +void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride, + const int16_t *HFilter, const int16_t *VFilter, + uint8_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, unsigned int output_height) { + // Between passes, we use an intermediate buffer whose height is extended to + // have enough horizontally filtered values as input for the vertical pass. + // This buffer is allocated to be big enough for the largest block type we + // support. + const int kInterp_Extend = 4; + const unsigned int intermediate_height = + (kInterp_Extend - 1) + output_height + kInterp_Extend; + unsigned int i, j; + + assert(intermediate_height > 7); + + // Size of intermediate_buffer is max_intermediate_height * filter_max_width, + // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height + // + kInterp_Extend + // = 3 + 16 + 4 + // = 23 + // and filter_max_width = 16 + // + uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension]; + const int intermediate_next_stride = + 1 - static_cast<int>(intermediate_height * output_width); + + // Horizontal pass (src -> transposed intermediate). + uint8_t *output_ptr = intermediate_buffer; + const int src_next_row_stride = src_stride - output_width; + src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); + for (i = 0; i < intermediate_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) + + (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) + + (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) + + (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) + + (AV1_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT); + ++src_ptr; + output_ptr += intermediate_height; + } + src_ptr += src_next_row_stride; + output_ptr += intermediate_next_stride; + } + + // Vertical pass (transposed intermediate -> dst). + src_ptr = intermediate_buffer; + const int dst_next_row_stride = dst_stride - output_width; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) + + (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) + + (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) + + (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) + + (AV1_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT); + src_ptr += intermediate_height; + } + src_ptr += intermediate_next_stride; + dst_ptr += dst_next_row_stride; + } +} + +void block2d_average_c(uint8_t *src, unsigned int src_stride, + uint8_t *output_ptr, unsigned int output_stride, + unsigned int output_width, unsigned int output_height) { + unsigned int i, j; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; + } + output_ptr += output_stride; + } +} + +void filter_average_block2d_8_c(const uint8_t *src_ptr, + const unsigned int src_stride, + const int16_t *HFilter, const int16_t *VFilter, + uint8_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, + unsigned int output_height) { + uint8_t tmp[kMaxDimension * kMaxDimension]; + + assert(output_width <= kMaxDimension); + assert(output_height <= kMaxDimension); + filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension, + output_width, output_height); + block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width, + output_height); +} + +void highbd_filter_block2d_8_c(const uint16_t *src_ptr, + const unsigned int src_stride, + const int16_t *HFilter, const int16_t *VFilter, + uint16_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, + unsigned int output_height, int bd) { + // Between passes, we use an intermediate buffer whose height is extended to + // have enough horizontally filtered values as input for the vertical pass. + // This buffer is allocated to be big enough for the largest block type we + // support. + const int kInterp_Extend = 4; + const unsigned int intermediate_height = + (kInterp_Extend - 1) + output_height + kInterp_Extend; + + /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, + * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height + * + kInterp_Extend + * = 3 + 16 + 4 + * = 23 + * and filter_max_width = 16 + */ + uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 }; + const int intermediate_next_stride = + 1 - static_cast<int>(intermediate_height * output_width); + + // Horizontal pass (src -> transposed intermediate). + { + uint16_t *output_ptr = intermediate_buffer; + const int src_next_row_stride = src_stride - output_width; + unsigned int i, j; + src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); + for (i = 0; i < intermediate_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) + + (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) + + (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) + + (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) + + (AV1_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd); + ++src_ptr; + output_ptr += intermediate_height; + } + src_ptr += src_next_row_stride; + output_ptr += intermediate_next_stride; + } + } + + // Vertical pass (transposed intermediate -> dst). + { + const uint16_t *interm_ptr = intermediate_buffer; + const int dst_next_row_stride = dst_stride - output_width; + unsigned int i, j; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + const int temp = + (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) + + (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) + + (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) + + (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) + + (AV1_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd); + interm_ptr += intermediate_height; + } + interm_ptr += intermediate_next_stride; + dst_ptr += dst_next_row_stride; + } + } +} + +void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride, + uint16_t *output_ptr, unsigned int output_stride, + unsigned int output_width, + unsigned int output_height) { + unsigned int i, j; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; + } + output_ptr += output_stride; + } +} + +void highbd_filter_average_block2d_8_c( + const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter, + const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, unsigned int output_height, int bd) { + uint16_t tmp[kMaxDimension * kMaxDimension]; + + assert(output_width <= kMaxDimension); + assert(output_height <= kMaxDimension); + highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, + kMaxDimension, output_width, output_height, bd); + highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, + output_width, output_height); +} + +class ConvolveTestBase : public ::testing::TestWithParam<ConvolveParam> { + public: + static void SetUpTestSuite() { + // Force input_ to be unaligned, output to be 16 byte aligned. + input_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kInputBufferSize + 1)) + + 1; + ASSERT_NE(input_, nullptr); + ref8_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kOutputStride * kMaxDimension)); + ASSERT_NE(ref8_, nullptr); + output_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kOutputBufferSize)); + ASSERT_NE(output_, nullptr); + output_ref_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kOutputBufferSize)); + ASSERT_NE(output_ref_, nullptr); + input16_ = reinterpret_cast<uint16_t *>(aom_memalign( + kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) + + 1; + ASSERT_NE(input16_, nullptr); + ref16_ = reinterpret_cast<uint16_t *>(aom_memalign( + kDataAlignment, kOutputStride * kMaxDimension * sizeof(uint16_t))); + ASSERT_NE(ref16_, nullptr); + output16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); + ASSERT_NE(output16_, nullptr); + output16_ref_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); + ASSERT_NE(output16_ref_, nullptr); + } + + static void TearDownTestSuite() { + aom_free(input_ - 1); + input_ = nullptr; + aom_free(ref8_); + ref8_ = nullptr; + aom_free(output_); + output_ = nullptr; + aom_free(output_ref_); + output_ref_ = nullptr; + aom_free(input16_ - 1); + input16_ = nullptr; + aom_free(ref16_); + ref16_ = nullptr; + aom_free(output16_); + output16_ = nullptr; + aom_free(output16_ref_); + output16_ref_ = nullptr; + } + + protected: + static const int kDataAlignment = 16; + static const int kOuterBlockSize = 4 * kMaxDimension; + static const int kInputStride = kOuterBlockSize; + static const int kOutputStride = kOuterBlockSize; + static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; + static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; + + int Width() const { return GET_PARAM(0); } + int Height() const { return GET_PARAM(1); } + int BorderLeft() const { + const int center = (kOuterBlockSize - Width()) / 2; + return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); + } + int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } + + bool IsIndexInBorder(int i) { + return (i < BorderTop() * kOuterBlockSize || + i >= (BorderTop() + Height()) * kOuterBlockSize || + i % kOuterBlockSize < BorderLeft() || + i % kOuterBlockSize >= (BorderLeft() + Width())); + } + + void SetUp() override { + UUT_ = GET_PARAM(2); + if (UUT_->use_highbd_ != 0) + mask_ = (1 << UUT_->use_highbd_) - 1; + else + mask_ = 255; + /* Set up guard blocks for an inner block centered in the outer block */ + for (int i = 0; i < kOutputBufferSize; ++i) { + if (IsIndexInBorder(i)) { + output_[i] = 255; + output16_[i] = mask_; + } else { + output_[i] = 0; + output16_[i] = 0; + } + } + + ::libaom_test::ACMRandom prng; + for (int i = 0; i < kInputBufferSize; ++i) { + if (i & 1) { + input_[i] = 255; + input16_[i] = mask_; + } else { + input_[i] = prng.Rand8Extremes(); + input16_[i] = prng.Rand16() & mask_; + } + } + } + + void SetConstantInput(int value) { + memset(input_, value, kInputBufferSize); + aom_memset16(input16_, value, kInputBufferSize); + } + + void CopyOutputToRef() { + memcpy(output_ref_, output_, kOutputBufferSize); + // Copy 16-bit pixels values. The effective number of bytes is double. + memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize); + } + + void CheckGuardBlocks() { + for (int i = 0; i < kOutputBufferSize; ++i) { + if (IsIndexInBorder(i)) { + EXPECT_EQ(255, output_[i]); + } + } + } + + uint8_t *input() const { + const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); + if (UUT_->use_highbd_ == 0) { + return input_ + offset; + } else { + return CONVERT_TO_BYTEPTR(input16_) + offset; + } + } + + uint8_t *output() const { + const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); + if (UUT_->use_highbd_ == 0) { + return output_ + offset; + } else { + return CONVERT_TO_BYTEPTR(output16_) + offset; + } + } + + uint8_t *output_ref() const { + const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); + if (UUT_->use_highbd_ == 0) { + return output_ref_ + offset; + } else { + return CONVERT_TO_BYTEPTR(output16_ref_) + offset; + } + } + + uint16_t lookup(uint8_t *list, int index) const { + if (UUT_->use_highbd_ == 0) { + return list[index]; + } else { + return CONVERT_TO_SHORTPTR(list)[index]; + } + } + + void assign_val(uint8_t *list, int index, uint16_t val) const { + if (UUT_->use_highbd_ == 0) { + list[index] = (uint8_t)val; + } else { + CONVERT_TO_SHORTPTR(list)[index] = val; + } + } + + void wrapper_filter_average_block2d_8_c( + const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter, + const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, unsigned int output_height) { + if (UUT_->use_highbd_ == 0) { + filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr, + dst_stride, output_width, output_height); + } else { + highbd_filter_average_block2d_8_c( + CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter, + CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height, + UUT_->use_highbd_); + } + } + + void wrapper_filter_block2d_8_c( + const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter, + const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride, + unsigned int output_width, unsigned int output_height) { + if (UUT_->use_highbd_ == 0) { + filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr, + dst_stride, output_width, output_height); + } else { + highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, + HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr), + dst_stride, output_width, output_height, + UUT_->use_highbd_); + } + } + + void MatchesReferenceSubpixelFilter() { + uint8_t *const in = input(); + uint8_t *const out = output(); + uint8_t *ref; + if (UUT_->use_highbd_ == 0) { + ref = ref8_; + } else { + ref = CONVERT_TO_BYTEPTR(ref16_); + } + int subpel_search; + for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS; + ++subpel_search) { + for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { + const InterpFilter filter = (InterpFilter)filter_bank; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel(filter, + subpel_search); + for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { + for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { + wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x], + filters[filter_y], ref, kOutputStride, + Width(), Height()); + + if (filter_x && filter_y) + continue; + else if (filter_y) + UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter, + 16, filters[filter_y], 16, Width(), Height()); + else if (filter_x) + API_REGISTER_STATE_CHECK(UUT_->h8_( + in, kInputStride, out, kOutputStride, filters[filter_x], 16, + kInvalidFilter, 16, Width(), Height())); + else + continue; + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(ref, y * kOutputStride + x), + lookup(out, y * kOutputStride + x)) + << "mismatch at (" << x << "," << y << "), " + << "filters (" << filter_bank << "," << filter_x << "," + << filter_y << ")"; + } + } + } + } + } + + void FilterExtremes() { + uint8_t *const in = input(); + uint8_t *const out = output(); + uint8_t *ref; + if (UUT_->use_highbd_ == 0) { + ref = ref8_; + } else { + ref = CONVERT_TO_BYTEPTR(ref16_); + } + + // Populate ref and out with some random data + ::libaom_test::ACMRandom prng; + for (int y = 0; y < Height(); ++y) { + for (int x = 0; x < Width(); ++x) { + uint16_t r; + if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { + r = prng.Rand8Extremes(); + } else { + r = prng.Rand16() & mask_; + } + assign_val(out, y * kOutputStride + x, r); + assign_val(ref, y * kOutputStride + x, r); + } + } + + for (int axis = 0; axis < 2; axis++) { + int seed_val = 0; + while (seed_val < 256) { + for (int y = 0; y < 8; ++y) { + for (int x = 0; x < 8; ++x) { + assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, + ((seed_val >> (axis ? y : x)) & 1) * mask_); + if (axis) seed_val++; + } + if (axis) + seed_val -= 8; + else + seed_val++; + } + if (axis) seed_val += 8; + int subpel_search; + for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS; + ++subpel_search) { + for (int filter_bank = 0; filter_bank < kNumFilterBanks; + ++filter_bank) { + const InterpFilter filter = (InterpFilter)filter_bank; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel( + filter, subpel_search); + for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { + for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { + wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x], + filters[filter_y], ref, + kOutputStride, Width(), Height()); + if (filter_x && filter_y) + continue; + else if (filter_y) + API_REGISTER_STATE_CHECK(UUT_->v8_( + in, kInputStride, out, kOutputStride, kInvalidFilter, 16, + filters[filter_y], 16, Width(), Height())); + else if (filter_x) + API_REGISTER_STATE_CHECK(UUT_->h8_( + in, kInputStride, out, kOutputStride, filters[filter_x], + 16, kInvalidFilter, 16, Width(), Height())); + else + continue; + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(lookup(ref, y * kOutputStride + x), + lookup(out, y * kOutputStride + x)) + << "mismatch at (" << x << "," << y << "), " + << "filters (" << filter_bank << "," << filter_x << "," + << filter_y << ")"; + } + } + } + } + } + } + } + + void SpeedTest() { + uint8_t *const in = input(); + uint8_t *const out = output(); + uint8_t *ref; + if (UUT_->use_highbd_ == 0) { + ref = ref8_; + } else { + ref = CONVERT_TO_BYTEPTR(ref16_); + } + + // Populate ref and out with some random data + ::libaom_test::ACMRandom prng; + for (int y = 0; y < Height(); ++y) { + for (int x = 0; x < Width(); ++x) { + uint16_t r; + if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { + r = prng.Rand8Extremes(); + } else { + r = prng.Rand16() & mask_; + } + assign_val(out, y * kOutputStride + x, r); + assign_val(ref, y * kOutputStride + x, r); + } + } + + InterpFilter filter = (InterpFilter)1; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel(filter, USE_8_TAPS); + wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1], + out, kOutputStride, Width(), Height()); + + aom_usec_timer timer; + int tests_num = 1000; + + aom_usec_timer_start(&timer); + while (tests_num > 0) { + for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { + filter = (InterpFilter)filter_bank; + filters = (const InterpKernel *)av1_get_interp_filter_kernel( + filter, USE_8_TAPS); + for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { + for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { + if (filter_x && filter_y) continue; + if (filter_y) + API_REGISTER_STATE_CHECK(UUT_->v8_( + in, kInputStride, out, kOutputStride, kInvalidFilter, 16, + filters[filter_y], 16, Width(), Height())); + else if (filter_x) + API_REGISTER_STATE_CHECK(UUT_->h8_( + in, kInputStride, out, kOutputStride, filters[filter_x], 16, + kInvalidFilter, 16, Width(), Height())); + } + } + } + tests_num--; + } + aom_usec_timer_mark(&timer); + + const int elapsed_time = + static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000); + printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(), + UUT_->use_highbd_, elapsed_time); + } + + const ConvolveFunctions *UUT_; + static uint8_t *input_; + static uint8_t *ref8_; + static uint8_t *output_; + static uint8_t *output_ref_; + static uint16_t *input16_; + static uint16_t *ref16_; + static uint16_t *output16_; + static uint16_t *output16_ref_; + int mask_; +}; + +uint8_t *ConvolveTestBase::input_ = nullptr; +uint8_t *ConvolveTestBase::ref8_ = nullptr; +uint8_t *ConvolveTestBase::output_ = nullptr; +uint8_t *ConvolveTestBase::output_ref_ = nullptr; +uint16_t *ConvolveTestBase::input16_ = nullptr; +uint16_t *ConvolveTestBase::ref16_ = nullptr; +uint16_t *ConvolveTestBase::output16_ = nullptr; +uint16_t *ConvolveTestBase::output16_ref_ = nullptr; + +using LowbdConvolveTest = ConvolveTestBase; + +TEST_P(LowbdConvolveTest, GuardBlocks) { CheckGuardBlocks(); } + +void FiltersWontSaturateWhenAddedPairwise() { + int subpel_search; + for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS; + ++subpel_search) { + for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { + const InterpFilter filter = (InterpFilter)filter_bank; + const InterpKernel *filters = + (const InterpKernel *)av1_get_interp_filter_kernel(filter, + subpel_search); + for (int i = 0; i < kNumFilters; i++) { + const int p0 = filters[i][0] + filters[i][1]; + const int p1 = filters[i][2] + filters[i][3]; + const int p2 = filters[i][4] + filters[i][5]; + const int p3 = filters[i][6] + filters[i][7]; + EXPECT_LE(p0, 128); + EXPECT_LE(p1, 128); + EXPECT_LE(p2, 128); + EXPECT_LE(p3, 128); + EXPECT_LE(p0 + p3, 128); + EXPECT_LE(p0 + p3 + p1, 128); + EXPECT_LE(p0 + p3 + p1 + p2, 128); + EXPECT_EQ(p0 + p1 + p2 + p3, 128); + } + } + } +} + +TEST(LowbdConvolveTest, FiltersWontSaturateWhenAddedPairwise) { + FiltersWontSaturateWhenAddedPairwise(); +} + +TEST_P(LowbdConvolveTest, MatchesReferenceSubpixelFilter) { + MatchesReferenceSubpixelFilter(); +} + +TEST_P(LowbdConvolveTest, FilterExtremes) { FilterExtremes(); } + +TEST_P(LowbdConvolveTest, DISABLED_Speed) { SpeedTest(); } + +using std::make_tuple; + +// WRAP macro is only used for high bitdepth build. +#if CONFIG_AV1_HIGHBITDEPTH +#define WRAP(func, bd) \ + static void wrap_##func##_##bd( \ + const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ + ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \ + const int16_t *filter_y, int filter_y_stride, int w, int h) { \ + aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x, \ + filter_x_stride, filter_y, filter_y_stride, w, h, bd); \ + } +#if HAVE_SSE2 && AOM_ARCH_X86_64 +WRAP(convolve8_horiz_sse2, 8) +WRAP(convolve8_vert_sse2, 8) +WRAP(convolve8_horiz_sse2, 10) +WRAP(convolve8_vert_sse2, 10) +WRAP(convolve8_horiz_sse2, 12) +WRAP(convolve8_vert_sse2, 12) +#endif // HAVE_SSE2 && AOM_ARCH_X86_64 + +WRAP(convolve8_horiz_c, 8) +WRAP(convolve8_vert_c, 8) +WRAP(convolve8_horiz_c, 10) +WRAP(convolve8_vert_c, 10) +WRAP(convolve8_horiz_c, 12) +WRAP(convolve8_vert_c, 12) + +#if HAVE_AVX2 +WRAP(convolve8_horiz_avx2, 8) +WRAP(convolve8_vert_avx2, 8) + +WRAP(convolve8_horiz_avx2, 10) +WRAP(convolve8_vert_avx2, 10) + +WRAP(convolve8_horiz_avx2, 12) +WRAP(convolve8_vert_avx2, 12) +#endif // HAVE_AVX2 + +#if HAVE_NEON +WRAP(convolve8_horiz_neon, 8) +WRAP(convolve8_vert_neon, 8) + +WRAP(convolve8_horiz_neon, 10) +WRAP(convolve8_vert_neon, 10) + +WRAP(convolve8_horiz_neon, 12) +WRAP(convolve8_vert_neon, 12) +#endif // HAVE_NEON +#endif // CONFIG_AV1_HIGHBITDEPTH + +#undef WRAP + +#if CONFIG_AV1_HIGHBITDEPTH + +using HighbdConvolveTest = ConvolveTestBase; + +TEST_P(HighbdConvolveTest, GuardBlocks) { CheckGuardBlocks(); } + +TEST(HighbdConvolveTest, FiltersWontSaturateWhenAddedPairwise) { + FiltersWontSaturateWhenAddedPairwise(); +} + +TEST_P(HighbdConvolveTest, MatchesReferenceSubpixelFilter) { + MatchesReferenceSubpixelFilter(); +} + +TEST_P(HighbdConvolveTest, FilterExtremes) { FilterExtremes(); } + +TEST_P(HighbdConvolveTest, DISABLED_Speed) { SpeedTest(); } + +const ConvolveFunctions wrap_convolve8_c(wrap_convolve8_horiz_c_8, + wrap_convolve8_vert_c_8, 8); +const ConvolveFunctions wrap_convolve10_c(wrap_convolve8_horiz_c_10, + wrap_convolve8_vert_c_10, 10); +const ConvolveFunctions wrap_convolve12_c(wrap_convolve8_horiz_c_12, + wrap_convolve8_vert_c_12, 12); +const ConvolveParam kArrayHighbdConvolve_c[] = { ALL_SIZES(wrap_convolve8_c), + ALL_SIZES(wrap_convolve10_c), + ALL_SIZES(wrap_convolve12_c) }; + +INSTANTIATE_TEST_SUITE_P(C, HighbdConvolveTest, + ::testing::ValuesIn(kArrayHighbdConvolve_c)); +#endif // CONFIG_AV1_HIGHBITDEPTH + +const ConvolveFunctions convolve8_c(aom_convolve8_horiz_c, aom_convolve8_vert_c, + 0); +const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) }; + +INSTANTIATE_TEST_SUITE_P(C, LowbdConvolveTest, + ::testing::ValuesIn(kArrayConvolve_c)); + +#if HAVE_SSE2 && AOM_ARCH_X86_64 +#if CONFIG_AV1_HIGHBITDEPTH +const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve8_horiz_sse2_8, + wrap_convolve8_vert_sse2_8, 8); +const ConvolveFunctions wrap_convolve10_sse2(wrap_convolve8_horiz_sse2_10, + wrap_convolve8_vert_sse2_10, 10); +const ConvolveFunctions wrap_convolve12_sse2(wrap_convolve8_horiz_sse2_12, + wrap_convolve8_vert_sse2_12, 12); +const ConvolveParam kArrayHighbdConvolve_sse2[] = { + ALL_SIZES(wrap_convolve8_sse2), ALL_SIZES(wrap_convolve10_sse2), + ALL_SIZES(wrap_convolve12_sse2) +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, HighbdConvolveTest, + ::testing::ValuesIn(kArrayHighbdConvolve_sse2)); +#endif +const ConvolveFunctions convolve8_sse2(aom_convolve8_horiz_sse2, + aom_convolve8_vert_sse2, 0); +const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) }; + +INSTANTIATE_TEST_SUITE_P(SSE2, LowbdConvolveTest, + ::testing::ValuesIn(kArrayConvolve_sse2)); +#endif + +#if HAVE_SSSE3 +const ConvolveFunctions convolve8_ssse3(aom_convolve8_horiz_ssse3, + aom_convolve8_vert_ssse3, 0); + +const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) }; + +INSTANTIATE_TEST_SUITE_P(SSSE3, LowbdConvolveTest, + ::testing::ValuesIn(kArrayConvolve8_ssse3)); +#endif + +#if HAVE_AVX2 +#if CONFIG_AV1_HIGHBITDEPTH +const ConvolveFunctions wrap_convolve8_avx2(wrap_convolve8_horiz_avx2_8, + wrap_convolve8_vert_avx2_8, 8); +const ConvolveFunctions wrap_convolve10_avx2(wrap_convolve8_horiz_avx2_10, + wrap_convolve8_vert_avx2_10, 10); +const ConvolveFunctions wrap_convolve12_avx2(wrap_convolve8_horiz_avx2_12, + wrap_convolve8_vert_avx2_12, 12); +const ConvolveParam kArray_HighbdConvolve8_avx2[] = { + ALL_SIZES_64(wrap_convolve8_avx2), ALL_SIZES_64(wrap_convolve10_avx2), + ALL_SIZES_64(wrap_convolve12_avx2) +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, HighbdConvolveTest, + ::testing::ValuesIn(kArray_HighbdConvolve8_avx2)); +#endif +const ConvolveFunctions convolve8_avx2(aom_convolve8_horiz_avx2, + aom_convolve8_vert_avx2, 0); +const ConvolveParam kArray_Convolve8_avx2[] = { ALL_SIZES(convolve8_avx2) }; + +INSTANTIATE_TEST_SUITE_P(AVX2, LowbdConvolveTest, + ::testing::ValuesIn(kArray_Convolve8_avx2)); +#endif // HAVE_AVX2 + +#if HAVE_NEON +#if CONFIG_AV1_HIGHBITDEPTH +const ConvolveFunctions wrap_convolve8_neon(wrap_convolve8_horiz_neon_8, + wrap_convolve8_vert_neon_8, 8); +const ConvolveFunctions wrap_convolve10_neon(wrap_convolve8_horiz_neon_10, + wrap_convolve8_vert_neon_10, 10); +const ConvolveFunctions wrap_convolve12_neon(wrap_convolve8_horiz_neon_12, + wrap_convolve8_vert_neon_12, 12); +const ConvolveParam kArray_HighbdConvolve8_neon[] = { + ALL_SIZES_64(wrap_convolve8_neon), ALL_SIZES_64(wrap_convolve10_neon), + ALL_SIZES_64(wrap_convolve12_neon) +}; + +INSTANTIATE_TEST_SUITE_P(NEON, HighbdConvolveTest, + ::testing::ValuesIn(kArray_HighbdConvolve8_neon)); +#endif +const ConvolveFunctions convolve8_neon(aom_convolve8_horiz_neon, + aom_convolve8_vert_neon, 0); +const ConvolveParam kArray_Convolve8_neon[] = { ALL_SIZES(convolve8_neon) }; + +INSTANTIATE_TEST_SUITE_P(NEON, LowbdConvolveTest, + ::testing::ValuesIn(kArray_Convolve8_neon)); +#endif // HAVE_NEON + +#if HAVE_NEON_DOTPROD +const ConvolveFunctions convolve8_neon_dotprod(aom_convolve8_horiz_neon_dotprod, + aom_convolve8_vert_neon_dotprod, + 0); +const ConvolveParam kArray_Convolve8_neon_dotprod[] = { ALL_SIZES( + convolve8_neon_dotprod) }; + +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, LowbdConvolveTest, + ::testing::ValuesIn(kArray_Convolve8_neon_dotprod)); +#endif // HAVE_NEON_DOTPROD + +#if HAVE_NEON_I8MM +const ConvolveFunctions convolve8_neon_i8mm(aom_convolve8_horiz_neon_i8mm, + aom_convolve8_vert_neon_i8mm, 0); +const ConvolveParam kArray_Convolve8_neon_i8mm[] = { ALL_SIZES( + convolve8_neon_i8mm) }; + +INSTANTIATE_TEST_SUITE_P(NEON_I8MM, LowbdConvolveTest, + ::testing::ValuesIn(kArray_Convolve8_neon_i8mm)); +#endif // HAVE_NEON_I8MM + +} // namespace diff --git a/third_party/aom/test/corner_match_test.cc b/third_party/aom/test/corner_match_test.cc new file mode 100644 index 0000000000..9733732180 --- /dev/null +++ b/third_party/aom/test/corner_match_test.cc @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <memory> +#include <new> +#include <tuple> + +#include "config/aom_dsp_rtcd.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "test/register_state_check.h" + +#include "aom_dsp/flow_estimation/corner_match.h" + +namespace test_libaom { + +namespace AV1CornerMatch { + +using libaom_test::ACMRandom; + +typedef double (*ComputeCrossCorrFunc)(const unsigned char *im1, int stride1, + int x1, int y1, const unsigned char *im2, + int stride2, int x2, int y2); + +using std::make_tuple; +using std::tuple; +typedef tuple<int, ComputeCrossCorrFunc> CornerMatchParam; + +class AV1CornerMatchTest : public ::testing::TestWithParam<CornerMatchParam> { + public: + ~AV1CornerMatchTest() override; + void SetUp() override; + + protected: + void RunCheckOutput(int run_times); + ComputeCrossCorrFunc target_func; + + libaom_test::ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CornerMatchTest); + +AV1CornerMatchTest::~AV1CornerMatchTest() = default; +void AV1CornerMatchTest::SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); + target_func = GET_PARAM(1); +} + +void AV1CornerMatchTest::RunCheckOutput(int run_times) { + const int w = 128, h = 128; + const int num_iters = 10000; + int i, j; + aom_usec_timer ref_timer, test_timer; + + std::unique_ptr<uint8_t[]> input1(new (std::nothrow) uint8_t[w * h]); + std::unique_ptr<uint8_t[]> input2(new (std::nothrow) uint8_t[w * h]); + ASSERT_NE(input1, nullptr); + ASSERT_NE(input2, nullptr); + + // Test the two extreme cases: + // i) Random data, should have correlation close to 0 + // ii) Linearly related data + noise, should have correlation close to 1 + int mode = GET_PARAM(0); + if (mode == 0) { + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + input1[i * w + j] = rnd_.Rand8(); + input2[i * w + j] = rnd_.Rand8(); + } + } else if (mode == 1) { + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int v = rnd_.Rand8(); + input1[i * w + j] = v; + input2[i * w + j] = (v / 2) + (rnd_.Rand8() & 15); + } + } + + for (i = 0; i < num_iters; ++i) { + int x1 = MATCH_SZ_BY2 + rnd_.PseudoUniform(w - 2 * MATCH_SZ_BY2); + int y1 = MATCH_SZ_BY2 + rnd_.PseudoUniform(h - 2 * MATCH_SZ_BY2); + int x2 = MATCH_SZ_BY2 + rnd_.PseudoUniform(w - 2 * MATCH_SZ_BY2); + int y2 = MATCH_SZ_BY2 + rnd_.PseudoUniform(h - 2 * MATCH_SZ_BY2); + + double res_c = av1_compute_cross_correlation_c(input1.get(), w, x1, y1, + input2.get(), w, x2, y2); + double res_simd = + target_func(input1.get(), w, x1, y1, input2.get(), w, x2, y2); + + if (run_times > 1) { + aom_usec_timer_start(&ref_timer); + for (j = 0; j < run_times; j++) { + av1_compute_cross_correlation_c(input1.get(), w, x1, y1, input2.get(), + w, x2, y2); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (j = 0; j < run_times; j++) { + target_func(input1.get(), w, x1, y1, input2.get(), w, x2, y2); + } + aom_usec_timer_mark(&test_timer); + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + + printf( + "c_time=%d \t simd_time=%d \t " + "gain=%d\n", + elapsed_time_c, elapsed_time_simd, + (elapsed_time_c / elapsed_time_simd)); + } else { + ASSERT_EQ(res_simd, res_c); + } + } +} + +TEST_P(AV1CornerMatchTest, CheckOutput) { RunCheckOutput(1); } +TEST_P(AV1CornerMatchTest, DISABLED_Speed) { RunCheckOutput(100000); } + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1CornerMatchTest, + ::testing::Values(make_tuple(0, &av1_compute_cross_correlation_sse4_1), + make_tuple(1, &av1_compute_cross_correlation_sse4_1))); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1CornerMatchTest, + ::testing::Values(make_tuple(0, &av1_compute_cross_correlation_avx2), + make_tuple(1, &av1_compute_cross_correlation_avx2))); +#endif +} // namespace AV1CornerMatch + +} // namespace test_libaom diff --git a/third_party/aom/test/cpu_speed_test.cc b/third_party/aom/test/cpu_speed_test.cc new file mode 100644 index 0000000000..b5f5d2974d --- /dev/null +++ b/third_party/aom/test/cpu_speed_test.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +namespace { + +const int kMaxPSNR = 100; + +class CpuSpeedTest + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, + public ::libaom_test::EncoderTest { + protected: + CpuSpeedTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + set_cpu_used_(GET_PARAM(2)), min_psnr_(kMaxPSNR), + tune_content_(AOM_CONTENT_DEFAULT) {} + ~CpuSpeedTest() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 25; + } + } + + void BeginPassHook(unsigned int /*pass*/) override { min_psnr_ = kMaxPSNR; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_SET_TUNE_CONTENT, tune_content_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + if (pkt->data.psnr.psnr[0] < min_psnr_) min_psnr_ = pkt->data.psnr.psnr[0]; + } + + void TestQ0(); + void TestScreencastQ0(); + void TestTuneScreen(); + void TestEncodeHighBitrate(); + void TestLowBitrate(); + + ::libaom_test::TestMode encoding_mode_; + int set_cpu_used_; + double min_psnr_; + int tune_content_; +}; + +void CpuSpeedTest::TestQ0() { + // Validate that this non multiple of 64 wide clip encodes and decodes + // without a mismatch when passing in a very low max q. This pushes + // the encoder to producing lots of big partitions which will likely + // extend into the border and test the border condition. + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 400; + cfg_.rc_max_quantizer = 0; + cfg_.rc_min_quantizer = 0; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 10); + + init_flags_ = AOM_CODEC_USE_PSNR; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + EXPECT_GE(min_psnr_, kMaxPSNR); +} + +void CpuSpeedTest::TestScreencastQ0() { + ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 3); + cfg_.g_timebase = video.timebase(); + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 400; + cfg_.rc_max_quantizer = 0; + cfg_.rc_min_quantizer = 0; + + init_flags_ = AOM_CODEC_USE_PSNR; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + EXPECT_GE(min_psnr_, kMaxPSNR); +} + +void CpuSpeedTest::TestTuneScreen() { + ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 3); + cfg_.g_timebase = video.timebase(); + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_target_bitrate = 2000; + cfg_.rc_max_quantizer = 63; + cfg_.rc_min_quantizer = 0; + tune_content_ = AOM_CONTENT_SCREEN; + + init_flags_ = AOM_CODEC_USE_PSNR; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +void CpuSpeedTest::TestEncodeHighBitrate() { + // Validate that this non multiple of 64 wide clip encodes and decodes + // without a mismatch when passing in a very low max q. This pushes + // the encoder to producing lots of big partitions which will likely + // extend into the border and test the border condition. + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 12000; + cfg_.rc_max_quantizer = 10; + cfg_.rc_min_quantizer = 0; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 10); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +void CpuSpeedTest::TestLowBitrate() { + // Validate that this clip encodes and decodes without a mismatch + // when passing in a very high min q. This pushes the encoder to producing + // lots of small partitions which might will test the other condition. + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_target_bitrate = 200; + cfg_.rc_min_quantizer = 40; + + ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 10); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +TEST_P(CpuSpeedTest, TestQ0) { TestQ0(); } +TEST_P(CpuSpeedTest, TestScreencastQ0) { TestScreencastQ0(); } +TEST_P(CpuSpeedTest, TestTuneScreen) { TestTuneScreen(); } +TEST_P(CpuSpeedTest, TestEncodeHighBitrate) { TestEncodeHighBitrate(); } +TEST_P(CpuSpeedTest, TestLowBitrate) { TestLowBitrate(); } + +class CpuSpeedTestLarge : public CpuSpeedTest {}; + +TEST_P(CpuSpeedTestLarge, TestQ0) { TestQ0(); } +TEST_P(CpuSpeedTestLarge, TestScreencastQ0) { TestScreencastQ0(); } +TEST_P(CpuSpeedTestLarge, TestTuneScreen) { TestTuneScreen(); } +TEST_P(CpuSpeedTestLarge, TestEncodeHighBitrate) { TestEncodeHighBitrate(); } +TEST_P(CpuSpeedTestLarge, TestLowBitrate) { TestLowBitrate(); } + +AV1_INSTANTIATE_TEST_SUITE(CpuSpeedTest, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::Range(1, 3)); +AV1_INSTANTIATE_TEST_SUITE(CpuSpeedTestLarge, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::Range(0, 1)); +} // namespace diff --git a/third_party/aom/test/cpu_used_firstpass_test.cc b/third_party/aom/test/cpu_used_firstpass_test.cc new file mode 100644 index 0000000000..53db8b0d13 --- /dev/null +++ b/third_party/aom/test/cpu_used_firstpass_test.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdlib> + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +const double kPsnrDiffThreshold = 0.1; + +// Params: first pass cpu used, second pass cpu used +class CpuUsedFirstpassTest + : public ::libaom_test::CodecTestWith2Params<int, int>, + public ::libaom_test::EncoderTest { + protected: + CpuUsedFirstpassTest() + : EncoderTest(GET_PARAM(0)), second_pass_cpu_used_(GET_PARAM(2)) {} + ~CpuUsedFirstpassTest() override = default; + + void SetUp() override { + InitializeConfig(::libaom_test::kTwoPassGood); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = AOM_VBR; + cfg_.rc_target_bitrate = 1000; + cfg_.g_lag_in_frames = 19; + cfg_.g_threads = 0; + init_flags_ = AOM_CODEC_USE_PSNR; + } + + void BeginPassHook(unsigned int pass) override { + psnr_ = 0.0; + nframes_ = 0; + + if (pass == 0) + cpu_used_ = first_pass_cpu_used_; + else + cpu_used_ = second_pass_cpu_used_; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetPsnrDiffThreshold() { return kPsnrDiffThreshold; } + + void DoTest() { + libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 30); + double ref_psnr; + double psnr_diff; + + first_pass_cpu_used_ = second_pass_cpu_used_; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // same preset case ref_psnr + ref_psnr = GetAveragePsnr(); + + first_pass_cpu_used_ = GET_PARAM(1); + if (first_pass_cpu_used_ == second_pass_cpu_used_) return; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + psnr_diff = std::abs(ref_psnr - GetAveragePsnr()); + EXPECT_LT(psnr_diff, GetPsnrDiffThreshold()) + << "first pass cpu used = " << first_pass_cpu_used_ + << ", second pass cpu used = " << second_pass_cpu_used_; + } + + int cpu_used_; + int first_pass_cpu_used_; + int second_pass_cpu_used_; + unsigned int nframes_; + double psnr_; +}; + +TEST_P(CpuUsedFirstpassTest, FirstPassTest) { DoTest(); } + +class CpuUsedFirstpassTestLarge : public CpuUsedFirstpassTest {}; + +TEST_P(CpuUsedFirstpassTestLarge, FirstPassTest) { DoTest(); } + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +static const int kSecondPassCpuUsedLarge[] = { 2, 4 }; +static const int kSecondPassCpuUsed[] = { 6 }; +#else +static const int kSecondPassCpuUsedLarge[] = { 2 }; +static const int kSecondPassCpuUsed[] = { 4, 6 }; +#endif +#else +static const int kSecondPassCpuUsedLarge[] = { 2 }; +static const int kSecondPassCpuUsed[] = { 4, 6 }; +#endif + +AV1_INSTANTIATE_TEST_SUITE( + CpuUsedFirstpassTestLarge, ::testing::Values(2, 4, 6), + ::testing::ValuesIn(kSecondPassCpuUsedLarge)); // cpu_used + +AV1_INSTANTIATE_TEST_SUITE( + CpuUsedFirstpassTest, ::testing::Values(2, 4, 6), + ::testing::ValuesIn(kSecondPassCpuUsed)); // cpu_used + +} // namespace diff --git a/third_party/aom/test/datarate_test.cc b/third_party/aom/test/datarate_test.cc new file mode 100644 index 0000000000..a75a72fab6 --- /dev/null +++ b/third_party/aom/test/datarate_test.cc @@ -0,0 +1,712 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "config/aom_config.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/codec_factory.h" +#include "test/datarate_test.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "aom/aom_codec.h" + +namespace datarate_test { +namespace { + +// Params: test mode, speed, aq mode and index for bitrate array. +class DatarateTestLarge + : public ::libaom_test::CodecTestWith4Params<libaom_test::TestMode, int, + unsigned int, int>, + public DatarateTest { + public: + DatarateTestLarge() : DatarateTest(GET_PARAM(0)) { + set_cpu_used_ = GET_PARAM(2); + aq_mode_ = GET_PARAM(3); + } + + protected: + ~DatarateTestLarge() override = default; + + void SetUp() override { + InitializeConfig(GET_PARAM(1)); + ResetModel(); + } + + virtual void BasicRateTargetingVBRTest() { + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_error_resilient = 0; + cfg_.rc_end_usage = AOM_VBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 140); + const int bitrate_array[2] = { 400, 800 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.7) + << " The datarate for the file is lower than target by too much!"; + // FIXME(jingning): Lower this test threshold after vbr mode can render + // sufficiently accurate bit rate. + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.45) + << " The datarate for the file is greater than target by too much!"; + } + + virtual void BasicRateTargetingCBRTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 140); + const int bitrate_array[2] = { 150, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.19) + << " The datarate for the file is greater than target by too much!"; + } + + virtual void BasicRateTargetingCBRSpikeTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.kf_max_dist = 3000; + cfg_.kf_min_dist = 3000; + + ::libaom_test::I420VideoSource video("desktopqvga2.320_240.yuv", 320, 240, + 30, 1, 0, 800); + const int bitrate_array[2] = { 100, 200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + max_perc_spike_ = 3.0; + max_perc_spike_high_ = 8.0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.19) + << " The datarate for the file is greater than target by too much!"; + ASSERT_LE(num_spikes_, 8); + ASSERT_LT(num_spikes_high_, 1); + } + + virtual void BasicRateTargetingCBRDynamicBitrateTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.kf_max_dist = 3000; + cfg_.kf_min_dist = 3000; + + ::libaom_test::I420VideoSource video("desktop1.320_180.yuv", 320, 180, 30, + 1, 0, 800); + const int bitrate_array[2] = { 100, 200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + target_bitrate_update_[0] = cfg_.rc_target_bitrate; + target_bitrate_update_[1] = static_cast<int>(1.3 * cfg_.rc_target_bitrate); + target_bitrate_update_[2] = static_cast<int>(0.7 * cfg_.rc_target_bitrate); + frame_update_bitrate_ = 250; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < 3; i++) { + ASSERT_GE(effective_datarate_dynamic_[i], + target_bitrate_update_[i] * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_dynamic_[i], + target_bitrate_update_[i] * 1.20) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingMultiThreadCBRTest() { + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, 400); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_threads = 4; + + const int bitrate_array[2] = { 250, 650 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + tile_column_ = 2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 1.15) + << " The datarate for the file missed the target!" + << cfg_.rc_target_bitrate << " " << effective_datarate_; + } + + virtual void ErrorResilienceOnSceneCuts() { + if (GET_PARAM(4) > 0) return; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.g_error_resilient = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + cfg_.rc_target_bitrate = 500; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; + } + + virtual void BasicRateTargetingCBRPeriodicKeyFrameTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + // Periodic keyframe + cfg_.kf_max_dist = 50; + + ::libaom_test::I420VideoSource video("pixel_capture_w320h240.yuv", 320, 240, + 30, 1, 0, 310); + const int bitrate_array[2] = { 150, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; + } + + virtual void CBRPeriodicKeyFrameOnSceneCuts() { + if (GET_PARAM(4) > 0) return; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + // Periodic keyframe + cfg_.kf_max_dist = 30; + cfg_.kf_min_dist = 30; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + cfg_.rc_target_bitrate = 500; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.3) + << " The datarate for the file is greater than target by too much!"; + } + + virtual void BasicRateTargetingAQModeOnOffCBRTest() { + if (GET_PARAM(4) > 0) return; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.g_pass = AOM_RC_ONE_PASS; + cfg_.g_usage = AOM_USAGE_REALTIME; + cfg_.kf_mode = AOM_KF_DISABLED; + + ::libaom_test::I420VideoSource video("pixel_capture_w320h240.yuv", 320, 240, + 30, 1, 0, 310); + cfg_.rc_target_bitrate = 60; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; + } + + virtual void BasicRateTargeting444CBRScreenTest() { + ::libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140); + + cfg_.g_profile = 1; + cfg_.g_timebase = video.timebase(); + + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + + const int bitrate_array[2] = { 250, 650 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + screen_mode_ = true; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 1.15) + << " The datarate for the file missed the target!" + << cfg_.rc_target_bitrate << " " << effective_datarate_; + } + + virtual void BasicRateTargetingSuperresCBR() { + ::libaom_test::I420VideoSource video("desktopqvga2.320_240.yuv", 320, 240, + 30, 1, 0, 800); + + cfg_.g_profile = 0; + cfg_.g_timebase = video.timebase(); + + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + + cfg_.rc_superres_mode = AOM_SUPERRES_FIXED; + cfg_.rc_superres_denominator = 16; + cfg_.rc_superres_kf_denominator = 16; + + const int bitrate_array[2] = { 250, 650 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 1.15) + << " The datarate for the file missed the target!" + << cfg_.rc_target_bitrate << " " << effective_datarate_; + } + + virtual void BasicRateTargetingSuperresCBRMultiThreads() { + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, 400); + + cfg_.g_profile = 0; + cfg_.g_timebase = video.timebase(); + + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_threads = 2; + + cfg_.rc_superres_mode = AOM_SUPERRES_FIXED; + cfg_.rc_superres_denominator = 16; + cfg_.rc_superres_kf_denominator = 16; + + const int bitrate_array[2] = { 250, 650 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + tile_column_ = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate), + effective_datarate_ * 1.15) + << " The datarate for the file missed the target!" + << cfg_.rc_target_bitrate << " " << effective_datarate_; + } +}; + +// Params: test mode, speed, aq mode. +class DatarateTestFrameDropLarge + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int, + unsigned int>, + public DatarateTest { + public: + DatarateTestFrameDropLarge() : DatarateTest(GET_PARAM(0)) { + set_cpu_used_ = GET_PARAM(2); + aq_mode_ = GET_PARAM(3); + } + + protected: + ~DatarateTestFrameDropLarge() override = default; + + void SetUp() override { + InitializeConfig(GET_PARAM(1)); + ResetModel(); + } + + virtual void ChangingDropFrameThreshTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_dropframe_thresh = 10; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 50; + cfg_.rc_end_usage = AOM_CBR; + cfg_.rc_target_bitrate = 200; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 1; + // TODO(marpan): Investigate datarate target failures with a smaller + // keyframe interval (128). + cfg_.kf_max_dist = 9999; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 100); + + const int kDropFrameThreshTestStep = 30; + aom_codec_pts_t last_drop = 140; + int last_num_drops = 0; + for (int i = 40; i < 100; i += kDropFrameThreshTestStep) { + cfg_.rc_dropframe_thresh = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.40) + << " The datarate for the file is greater than target by too much!"; + if (last_drop > 0) { + ASSERT_LE(first_drop_, last_drop) + << " The first dropped frame for drop_thresh " << i + << " > first dropped frame for drop_thresh " + << i - kDropFrameThreshTestStep; + } + ASSERT_GE(num_drops_, last_num_drops * 0.7) + << " The number of dropped frames for drop_thresh " << i + << " < number of dropped frames for drop_thresh " + << i - kDropFrameThreshTestStep; + last_drop = first_drop_; + last_num_drops = num_drops_; + } + } +}; + +// Check basic rate targeting for VBR mode. +TEST_P(DatarateTestLarge, BasicRateTargetingVBR) { + BasicRateTargetingVBRTest(); +} + +// Check basic rate targeting for CBR. +TEST_P(DatarateTestLarge, BasicRateTargetingCBR) { + BasicRateTargetingCBRTest(); +} + +// Check basic rate targeting for CBR, with 4 threads +TEST_P(DatarateTestLarge, BasicRateTargetingMultiThreadCBR) { + BasicRateTargetingMultiThreadCBRTest(); +} + +// Check basic rate targeting for periodic key frame. +TEST_P(DatarateTestLarge, PeriodicKeyFrameCBR) { + BasicRateTargetingCBRPeriodicKeyFrameTest(); +} + +// Check basic rate targeting for periodic key frame, aligned with scene change. +TEST_P(DatarateTestLarge, PeriodicKeyFrameCBROnSceneCuts) { + CBRPeriodicKeyFrameOnSceneCuts(); +} + +// Check basic rate targeting with error resilience on for scene cuts. +TEST_P(DatarateTestLarge, ErrorResilienceOnSceneCuts) { + ErrorResilienceOnSceneCuts(); +} + +// Check basic rate targeting for CBR, for 444 input screen mode. +#if defined(CONFIG_MAX_DECODE_PROFILE) && CONFIG_MAX_DECODE_PROFILE < 1 +TEST_P(DatarateTestLarge, DISABLED_BasicRateTargeting444CBRScreen) { +#else +TEST_P(DatarateTestLarge, BasicRateTargeting444CBRScreen) { +#endif + BasicRateTargeting444CBRScreenTest(); +} + +// Check basic rate targeting for Superres mode with CBR. +TEST_P(DatarateTestLarge, BasicRateTargetingSuperresCBR) { + BasicRateTargetingSuperresCBR(); +} + +// Check basic rate targeting for Superres mode with CBR and multi-threads. +TEST_P(DatarateTestLarge, BasicRateTargetingSuperresCBRMultiThreads) { + BasicRateTargetingSuperresCBRMultiThreads(); +} + +// Check that (1) the first dropped frame gets earlier and earlier +// as the drop frame threshold is increased, and (2) that the total number of +// frame drops does not decrease as we increase frame drop threshold. +// Use a lower qp-max to force some frame drops. +TEST_P(DatarateTestFrameDropLarge, ChangingDropFrameThresh) { + ChangingDropFrameThreshTest(); +} + +TEST_P(DatarateTestLarge, BasicRateTargetingAQModeOnOffCBR) { + BasicRateTargetingAQModeOnOffCBRTest(); +} + +class DatarateTestRealtime : public DatarateTestLarge {}; + +class DatarateTestFrameDropRealtime : public DatarateTestFrameDropLarge {}; + +// Params: aq mode. +class DatarateTestSpeedChangeRealtime + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, + unsigned int>, + public DatarateTest { + public: + DatarateTestSpeedChangeRealtime() : DatarateTest(GET_PARAM(0)) { + aq_mode_ = GET_PARAM(1); + speed_change_test_ = true; + } + + protected: + ~DatarateTestSpeedChangeRealtime() override = default; + + void SetUp() override { + InitializeConfig(GET_PARAM(1)); + ResetModel(); + } + + virtual void ChangingSpeedTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_dropframe_thresh = 10; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 50; + cfg_.rc_end_usage = AOM_CBR; + cfg_.rc_target_bitrate = 200; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 1; + // TODO(marpan): Investigate datarate target failures with a smaller + // keyframe interval (128). + cfg_.kf_max_dist = 9999; + cfg_.rc_dropframe_thresh = 0; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 100); + + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.83) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.35) + << " The datarate for the file is greater than target by too much!"; + } +}; + +// Check basic rate targeting for VBR mode. +TEST_P(DatarateTestRealtime, BasicRateTargetingVBR) { + BasicRateTargetingVBRTest(); +} + +// Check basic rate targeting for CBR. +TEST_P(DatarateTestRealtime, BasicRateTargetingCBR) { + BasicRateTargetingCBRTest(); +} + +// Check basic rate targeting for CBR. Use a longer clip, +// and verify #encode size spikes above threshold. +TEST_P(DatarateTestRealtime, BasicRateTargetingCBRSpike) { + BasicRateTargetingCBRSpikeTest(); +} + +// Check basic rate targeting for CBR. Use a longer clip, +// and verify encoder can respnd and hit new bitrates updated +// within the stream. +TEST_P(DatarateTestRealtime, BasicRateTargetingCBRDynamicBitrate) { + BasicRateTargetingCBRDynamicBitrateTest(); +} + +// Check basic rate targeting for CBR, with 4 threads +TEST_P(DatarateTestRealtime, BasicRateTargetingMultiThreadCBR) { + BasicRateTargetingMultiThreadCBRTest(); +} + +// Check basic rate targeting for periodic key frame. +TEST_P(DatarateTestRealtime, PeriodicKeyFrameCBR) { + BasicRateTargetingCBRPeriodicKeyFrameTest(); +} + +// Check basic rate targeting for periodic key frame, aligned with scene change. +TEST_P(DatarateTestRealtime, PeriodicKeyFrameCBROnSceneCuts) { + CBRPeriodicKeyFrameOnSceneCuts(); +} + +// Check basic rate targeting with error resilience on for scene cuts. +TEST_P(DatarateTestRealtime, ErrorResilienceOnSceneCuts) { + ErrorResilienceOnSceneCuts(); +} + +// Check basic rate targeting for CBR for 444 screen mode. +#if defined(CONFIG_MAX_DECODE_PROFILE) && CONFIG_MAX_DECODE_PROFILE < 1 +TEST_P(DatarateTestRealtime, DISABLED_BasicRateTargeting444CBRScreen) { +#else +TEST_P(DatarateTestRealtime, BasicRateTargeting444CBRScreen) { +#endif + BasicRateTargeting444CBRScreenTest(); +} + +// Check basic rate targeting for Superres mode with CBR. +TEST_P(DatarateTestRealtime, BasicRateTargetingSuperresCBR) { + BasicRateTargetingSuperresCBR(); +} + +// Check basic rate targeting for Superres mode with CBR and multi-threads. +TEST_P(DatarateTestRealtime, BasicRateTargetingSuperresCBRMultiThreads) { + BasicRateTargetingSuperresCBRMultiThreads(); +} + +// Check that (1) the first dropped frame gets earlier and earlier +// as the drop frame threshold is increased, and (2) that the total number of +// frame drops does not decrease as we increase frame drop threshold. +// Use a lower qp-max to force some frame drops. +TEST_P(DatarateTestFrameDropRealtime, ChangingDropFrameThresh) { + ChangingDropFrameThreshTest(); +} + +TEST_P(DatarateTestSpeedChangeRealtime, ChangingSpeedTest) { + ChangingSpeedTest(); +} + +class DatarateTestSetFrameQpRealtime + : public DatarateTest, + public ::testing::TestWithParam<const libaom_test::AV1CodecFactory *> { + public: + DatarateTestSetFrameQpRealtime() : DatarateTest(GetParam()), frame_(0) {} + + protected: + ~DatarateTestSetFrameQpRealtime() override = default; + + void SetUp() override { + InitializeConfig(libaom_test::kRealTime); + ResetModel(); + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + set_cpu_used_ = 7; + DatarateTest::PreEncodeFrameHook(video, encoder); + frame_qp_ = rnd_.PseudoUniform(63); + encoder->Control(AV1E_SET_QUANTIZER_ONE_PASS, frame_qp_); + frame_++; + } + + void PostEncodeFrameHook(::libaom_test::Encoder *encoder) override { + if (frame_ >= total_frames_) return; + int qp = 0; + encoder->Control(AOME_GET_LAST_QUANTIZER_64, &qp); + ASSERT_EQ(qp, frame_qp_); + } + + protected: + int total_frames_; + + private: + int frame_qp_; + int frame_; + libaom_test::ACMRandom rnd_; +}; + +TEST_P(DatarateTestSetFrameQpRealtime, SetFrameQpOnePass) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 50; + cfg_.rc_end_usage = AOM_CBR; + cfg_.rc_target_bitrate = 200; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 1; + cfg_.kf_max_dist = 9999; + cfg_.rc_dropframe_thresh = 0; + + total_frames_ = 100; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 100); + + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_SUITE(DatarateTestLarge, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(5, 7), ::testing::Values(0, 3), + ::testing::Values(0, 1)); + +AV1_INSTANTIATE_TEST_SUITE(DatarateTestFrameDropLarge, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(5, 7), ::testing::Values(0, 3)); + +AV1_INSTANTIATE_TEST_SUITE(DatarateTestRealtime, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(7, 12), ::testing::Values(0, 3), + ::testing::Values(0, 1)); + +AV1_INSTANTIATE_TEST_SUITE(DatarateTestFrameDropRealtime, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(7, 12), ::testing::Values(0, 3)); + +AV1_INSTANTIATE_TEST_SUITE(DatarateTestSpeedChangeRealtime, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Values(0, 3)); + +INSTANTIATE_TEST_SUITE_P( + AV1, DatarateTestSetFrameQpRealtime, + ::testing::Values( + static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1))); + +} // namespace +} // namespace datarate_test diff --git a/third_party/aom/test/datarate_test.h b/third_party/aom/test/datarate_test.h new file mode 100644 index 0000000000..accc1ad86b --- /dev/null +++ b/third_party/aom/test/datarate_test.h @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "config/aom_config.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "aom/aom_codec.h" + +namespace datarate_test { +namespace { +class DatarateTest : public ::libaom_test::EncoderTest { + public: + explicit DatarateTest(const ::libaom_test::CodecFactory *codec) + : EncoderTest(codec), set_cpu_used_(0), aq_mode_(0), + speed_change_test_(false) {} + + protected: + ~DatarateTest() override = default; + + virtual void ResetModel() { + last_pts_ = 0; + bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; + frame_number_ = 0; + tot_frame_number_ = 0; + first_drop_ = 0; + num_drops_ = 0; + // Denoiser is off by default. + denoiser_on_ = 0; + bits_total_ = 0; + denoiser_offon_test_ = 0; + denoiser_offon_period_ = -1; + tile_column_ = 0; + screen_mode_ = false; + max_perc_spike_ = 1.0; + max_perc_spike_high_ = 1.0; + num_spikes_ = 0; + num_spikes_high_ = 0; + frame_update_bitrate_ = 0; + for (int i = 0; i < 3; i++) { + target_bitrate_update_[i] = 0; + frame_number_dynamic_[i] = 0; + bits_total_dynamic_[i] = 0; + effective_datarate_dynamic_[i] = 0.0; + } + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_SET_AQ_MODE, aq_mode_); + encoder->Control(AV1E_SET_TILE_COLUMNS, tile_column_); + encoder->Control(AV1E_SET_ROW_MT, 1); + if (cfg_.g_usage == AOM_USAGE_REALTIME) { + encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_WARPED_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_RESTORATION, 0); + encoder->Control(AV1E_SET_ENABLE_OBMC, 0); + encoder->Control(AV1E_SET_DELTAQ_MODE, 0); + encoder->Control(AV1E_SET_ENABLE_TPL_MODEL, 0); + encoder->Control(AV1E_SET_ENABLE_CDEF, 1); + encoder->Control(AV1E_SET_COEFF_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_MODE_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_MV_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_DV_COST_UPD_FREQ, 2); + } + if (screen_mode_) { + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN); + encoder->Control(AV1E_SET_ENABLE_PALETTE, 1); + encoder->Control(AV1E_SET_ENABLE_INTRABC, 0); + } + } + + if (speed_change_test_) { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 8); + } else if (video->frame() == 30) { + encoder->Control(AOME_SET_CPUUSED, 7); + } else if (video->frame() == 60) { + encoder->Control(AOME_SET_CPUUSED, 6); + } else if (video->frame() == 90) { + encoder->Control(AOME_SET_CPUUSED, 7); + } + } + + if (frame_update_bitrate_ > 0) { + if (frame_number_ == frame_update_bitrate_) { + cfg_.rc_target_bitrate = target_bitrate_update_[1]; + encoder->Config(&cfg_); + } else if (frame_number_ == 2 * frame_update_bitrate_) { + cfg_.rc_target_bitrate = target_bitrate_update_[2]; + encoder->Config(&cfg_); + } + } + + if (denoiser_offon_test_) { + ASSERT_GT(denoiser_offon_period_, 0) + << "denoiser_offon_period_ is not positive."; + if ((video->frame() + 1) % denoiser_offon_period_ == 0) { + // Flip denoiser_on_ periodically + denoiser_on_ ^= 1; + } + } + + encoder->Control(AV1E_SET_NOISE_SENSITIVITY, denoiser_on_); + + const aom_rational_t tb = video->timebase(); + timebase_ = static_cast<double>(tb.num) / tb.den; + duration_ = 0; + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + // Time since last timestamp = duration. + aom_codec_pts_t duration = pkt->data.frame.pts - last_pts_; + + if (duration > 1) { + // If first drop not set and we have a drop set it to this time. + if (!first_drop_) first_drop_ = last_pts_ + 1; + // Update the number of frame drops. + num_drops_ += static_cast<int>(duration - 1); + // Update counter for total number of frames (#frames input to encoder). + // Needed for setting the proper layer_id below. + tot_frame_number_ += static_cast<int>(duration - 1); + } + + // Add to the buffer the bits we'd expect from a constant bitrate server. + bits_in_buffer_model_ += static_cast<int64_t>( + duration * timebase_ * cfg_.rc_target_bitrate * 1000); + + // Buffer should not go negative. + ASSERT_GE(bits_in_buffer_model_, 0) + << "Buffer Underrun at frame " << pkt->data.frame.pts; + + const size_t frame_size_in_bits = pkt->data.frame.sz * 8; + + // Update the total encoded bits. + bits_total_ += frame_size_in_bits; + + // Update the most recent pts. + last_pts_ = pkt->data.frame.pts; + ++frame_number_; + ++tot_frame_number_; + const int per_frame_bandwidth = (cfg_.rc_target_bitrate * 1000) / 30; + if (frame_size_in_bits > max_perc_spike_ * per_frame_bandwidth && + frame_number_ > 1) + num_spikes_++; + if (frame_size_in_bits > max_perc_spike_high_ * per_frame_bandwidth && + frame_number_ > 1) + num_spikes_high_++; + + if (frame_update_bitrate_ > 0) { + if (frame_number_ < frame_update_bitrate_) { + bits_total_dynamic_[0] += frame_size_in_bits; + frame_number_dynamic_[0]++; + } else if (frame_number_ >= frame_update_bitrate_ && + frame_number_ < 2 * frame_update_bitrate_) { + bits_total_dynamic_[1] += frame_size_in_bits; + frame_number_dynamic_[1]++; + } else { + bits_total_dynamic_[2] += frame_size_in_bits; + frame_number_dynamic_[2]++; + } + } + } + + void EndPassHook() override { + duration_ = (last_pts_ + 1) * timebase_; + // Effective file datarate: + effective_datarate_ = (bits_total_ / 1000.0) / duration_; + if (frame_update_bitrate_ > 0) { + for (int i = 0; i < 3; i++) + effective_datarate_dynamic_[i] = + 30 * (bits_total_dynamic_[i] / 1000.0) / frame_number_dynamic_[i]; + } + } + + aom_codec_pts_t last_pts_; + double timebase_; + int frame_number_; // Counter for number of non-dropped/encoded frames. + int tot_frame_number_; // Counter for total number of input frames. + int64_t bits_total_; + double duration_; + double effective_datarate_; + int set_cpu_used_; + int64_t bits_in_buffer_model_; + aom_codec_pts_t first_drop_; + int num_drops_; + int denoiser_on_; + int denoiser_offon_test_; + int denoiser_offon_period_; + unsigned int aq_mode_; + bool speed_change_test_; + int tile_column_; + bool screen_mode_; + double max_perc_spike_; + double max_perc_spike_high_; + int num_spikes_; + int num_spikes_high_; + // These are use for test with dynamic bitrate change. + // Used to verify that the encoder can respond and hit bitrate that is updated + // during the sequence. + int frame_update_bitrate_; + int target_bitrate_update_[3]; + double effective_datarate_dynamic_[3]; + int64_t bits_total_dynamic_[3]; + int frame_number_dynamic_[3]; +}; + +} // namespace +} // namespace datarate_test diff --git a/third_party/aom/test/decode_api_test.cc b/third_party/aom/test/decode_api_test.cc new file mode 100644 index 0000000000..591a167e94 --- /dev/null +++ b/third_party/aom/test/decode_api_test.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#include "aom/aomdx.h" +#include "aom/aom_decoder.h" + +namespace { + +TEST(DecodeAPI, InvalidParams) { + uint8_t buf[1] = { 0 }; + aom_codec_ctx_t dec; + + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_dec_init(nullptr, nullptr, nullptr, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_dec_init(&dec, nullptr, nullptr, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_decode(nullptr, nullptr, 0, nullptr)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_decode(nullptr, buf, 0, nullptr)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_decode(nullptr, buf, sizeof(buf), nullptr)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_decode(nullptr, nullptr, sizeof(buf), nullptr)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(nullptr)); + EXPECT_NE(aom_codec_error(nullptr), nullptr); + EXPECT_EQ(aom_codec_error_detail(nullptr), nullptr); + + aom_codec_iface_t *iface = aom_codec_av1_dx(); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_dec_init(nullptr, iface, nullptr, 0)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec, iface, nullptr, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_decode(&dec, nullptr, sizeof(buf), nullptr)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(&dec, buf, 0, nullptr)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec)); +} + +TEST(DecodeAPI, InvalidControlId) { + aom_codec_iface_t *iface = aom_codec_av1_dx(); + aom_codec_ctx_t dec; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec, iface, nullptr, 0)); + EXPECT_EQ(AOM_CODEC_ERROR, aom_codec_control(&dec, -1, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_control(&dec, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec)); +} + +} // namespace diff --git a/third_party/aom/test/decode_multithreaded_test.cc b/third_party/aom/test/decode_multithreaded_test.cc new file mode 100644 index 0000000000..4e06f1afac --- /dev/null +++ b/third_party/aom/test/decode_multithreaded_test.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdio> +#include <cstdlib> +#include <string> + +#include "aom_mem/aom_mem.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +static const int kNumMultiThreadDecoders = 3; + +class AV1DecodeMultiThreadedTest + : public ::libaom_test::CodecTestWith5Params<int, int, int, int, int>, + public ::libaom_test::EncoderTest { + protected: + AV1DecodeMultiThreadedTest() + : EncoderTest(GET_PARAM(0)), md5_single_thread_(), md5_multi_thread_(), + n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)), + n_tile_groups_(GET_PARAM(3)), set_cpu_used_(GET_PARAM(4)), + row_mt_(GET_PARAM(5)) { + init_flags_ = AOM_CODEC_USE_PSNR; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = 704; + cfg.h = 576; + cfg.threads = 1; + cfg.allow_lowbitdepth = 1; + single_thread_dec_ = codec_->CreateDecoder(cfg, 0); + + // Test cfg.threads == powers of 2. + for (int i = 0; i < kNumMultiThreadDecoders; ++i) { + cfg.threads <<= 1; + multi_thread_dec_[i] = codec_->CreateDecoder(cfg, 0); + multi_thread_dec_[i]->Control(AV1D_SET_ROW_MT, row_mt_); + } + + if (single_thread_dec_->IsAV1()) { + single_thread_dec_->Control(AV1D_EXT_TILE_DEBUG, 1); + single_thread_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1); + single_thread_dec_->Control(AV1_SET_DECODE_TILE_COL, -1); + } + for (int i = 0; i < kNumMultiThreadDecoders; ++i) { + if (multi_thread_dec_[i]->IsAV1()) { + multi_thread_dec_[i]->Control(AV1D_EXT_TILE_DEBUG, 1); + multi_thread_dec_[i]->Control(AV1_SET_DECODE_TILE_ROW, -1); + multi_thread_dec_[i]->Control(AV1_SET_DECODE_TILE_COL, -1); + } + } + } + + ~AV1DecodeMultiThreadedTest() override { + delete single_thread_dec_; + for (int i = 0; i < kNumMultiThreadDecoders; ++i) + delete multi_thread_dec_[i]; + } + + void SetUp() override { InitializeConfig(libaom_test::kTwoPassGood); } + + void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_); + encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_); + encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_); + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + } + } + + void UpdateMD5(::libaom_test::Decoder *dec, const aom_codec_cx_pkt_t *pkt, + ::libaom_test::MD5 *md5) { + const aom_codec_err_t res = dec->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + const aom_image_t *img = dec->GetDxData().Next(); + md5->Add(img); + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + UpdateMD5(single_thread_dec_, pkt, &md5_single_thread_); + + for (int i = 0; i < kNumMultiThreadDecoders; ++i) + UpdateMD5(multi_thread_dec_[i], pkt, &md5_multi_thread_[i]); + } + + void DoTest() { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 500; + cfg_.g_lag_in_frames = 12; + cfg_.rc_end_usage = AOM_VBR; + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576, + timebase.den, timebase.num, 0, 2); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + const char *md5_single_thread_str = md5_single_thread_.Get(); + + for (int i = 0; i < kNumMultiThreadDecoders; ++i) { + const char *md5_multi_thread_str = md5_multi_thread_[i].Get(); + ASSERT_STREQ(md5_single_thread_str, md5_multi_thread_str); + } + } + + ::libaom_test::MD5 md5_single_thread_; + ::libaom_test::MD5 md5_multi_thread_[kNumMultiThreadDecoders]; + ::libaom_test::Decoder *single_thread_dec_; + ::libaom_test::Decoder *multi_thread_dec_[kNumMultiThreadDecoders]; + + private: + int n_tile_cols_; + int n_tile_rows_; + int n_tile_groups_; + int set_cpu_used_; + int row_mt_; +}; + +// run an encode and do the decode both in single thread +// and multi thread. Ensure that the MD5 of the output in both cases +// is identical. If so, the test passes. +TEST_P(AV1DecodeMultiThreadedTest, MD5Match) { + cfg_.large_scale_tile = 0; + single_thread_dec_->Control(AV1_SET_TILE_MODE, 0); + for (int i = 0; i < kNumMultiThreadDecoders; ++i) + multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 0); + DoTest(); +} + +class AV1DecodeMultiThreadedTestLarge : public AV1DecodeMultiThreadedTest {}; + +TEST_P(AV1DecodeMultiThreadedTestLarge, MD5Match) { + cfg_.large_scale_tile = 0; + single_thread_dec_->Control(AV1_SET_TILE_MODE, 0); + for (int i = 0; i < kNumMultiThreadDecoders; ++i) + multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 0); + DoTest(); +} + +// TODO(ranjit): More tests have to be added using pre-generated MD5. +AV1_INSTANTIATE_TEST_SUITE(AV1DecodeMultiThreadedTest, ::testing::Values(1, 2), + ::testing::Values(1, 2), ::testing::Values(1), + ::testing::Values(3), ::testing::Values(0, 1)); +AV1_INSTANTIATE_TEST_SUITE(AV1DecodeMultiThreadedTestLarge, + ::testing::Values(0, 1, 2, 6), + ::testing::Values(0, 1, 2, 6), + ::testing::Values(1, 4), ::testing::Values(0), + ::testing::Values(0, 1)); + +class AV1DecodeMultiThreadedLSTestLarge + : public AV1DecodeMultiThreadedTestLarge {}; + +TEST_P(AV1DecodeMultiThreadedLSTestLarge, MD5Match) { + cfg_.large_scale_tile = 1; + single_thread_dec_->Control(AV1_SET_TILE_MODE, 1); + for (int i = 0; i < kNumMultiThreadDecoders; ++i) + multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 1); + DoTest(); +} + +AV1_INSTANTIATE_TEST_SUITE(AV1DecodeMultiThreadedLSTestLarge, + ::testing::Values(6), ::testing::Values(6), + ::testing::Values(1), ::testing::Values(0, 3), + ::testing::Values(0, 1)); + +} // namespace diff --git a/third_party/aom/test/decode_perf_test.cc b/third_party/aom/test/decode_perf_test.cc new file mode 100644 index 0000000000..030035466c --- /dev/null +++ b/third_party/aom/test/decode_perf_test.cc @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string> +#include <tuple> + +#include "config/aom_version.h" + +#include "aom_ports/aom_timer.h" +#include "common/ivfenc.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/ivf_video_source.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/webm_video_source.h" + +using std::make_tuple; + +namespace { + +#define VIDEO_NAME 0 +#define THREADS 1 + +const double kUsecsInSec = 1000000.0; +const char kNewEncodeOutputFile[] = "new_encode.ivf"; + +/* + DecodePerfTest takes a tuple of filename + number of threads to decode with + */ +typedef std::tuple<const char *, unsigned> DecodePerfParam; + +// TODO(jimbankoski): Add actual test vectors here when available. +// const DecodePerfParam kAV1DecodePerfVectors[] = {}; + +/* + In order to reflect real world performance as much as possible, Perf tests + *DO NOT* do any correctness checks. Please run them alongside correctness + tests to ensure proper codec integrity. Furthermore, in this test we + deliberately limit the amount of system calls we make to avoid OS + preemption. + + TODO(joshualitt) create a more detailed perf measurement test to collect + power/temp/min max frame decode times/etc + */ + +class DecodePerfTest : public ::testing::TestWithParam<DecodePerfParam> {}; + +TEST_P(DecodePerfTest, PerfTest) { + const char *const video_name = GET_PARAM(VIDEO_NAME); + const unsigned threads = GET_PARAM(THREADS); + + libaom_test::WebMVideoSource video(video_name); + video.Init(); + + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.threads = threads; + cfg.allow_lowbitdepth = 1; + libaom_test::AV1Decoder decoder(cfg, 0); + + aom_usec_timer t; + aom_usec_timer_start(&t); + + for (video.Begin(); video.cxdata() != nullptr; video.Next()) { + decoder.DecodeFrame(video.cxdata(), video.frame_size()); + } + + aom_usec_timer_mark(&t); + const double elapsed_secs = double(aom_usec_timer_elapsed(&t)) / kUsecsInSec; + const unsigned frames = video.frame_number(); + const double fps = double(frames) / elapsed_secs; + + printf("{\n"); + printf("\t\"type\" : \"decode_perf_test\",\n"); + printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); + printf("\t\"videoName\" : \"%s\",\n", video_name); + printf("\t\"threadCount\" : %u,\n", threads); + printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs); + printf("\t\"totalFrames\" : %u,\n", frames); + printf("\t\"framesPerSecond\" : %f\n", fps); + printf("}\n"); +} + +// TODO(jimbankoski): Enabled when we have actual AV1 Decode vectors. +// INSTANTIATE_TEST_SUITE_P(AV1, DecodePerfTest, +// ::testing::ValuesIn(kAV1DecodePerfVectors)); + +class AV1NewEncodeDecodePerfTest + : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>, + public ::libaom_test::EncoderTest { + protected: + AV1NewEncodeDecodePerfTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), speed_(0), + outfile_(nullptr), out_frames_(0) {} + + ~AV1NewEncodeDecodePerfTest() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + + cfg_.g_lag_in_frames = 25; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_end_usage = AOM_VBR; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, speed_); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 2); + } + } + + void BeginPassHook(unsigned int /*pass*/) override { + const char *const env = getenv("LIBAOM_TEST_DATA_PATH"); + const std::string data_path(env ? env : "."); + const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile; + outfile_ = fopen(path_to_source.c_str(), "wb"); + ASSERT_NE(outfile_, nullptr); + } + + void EndPassHook() override { + if (outfile_ != nullptr) { + if (!fseek(outfile_, 0, SEEK_SET)) + ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_); + fclose(outfile_); + outfile_ = nullptr; + } + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + ++out_frames_; + + // Write initial file header if first frame. + if (pkt->data.frame.pts == 0) + ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_); + + // Write frame header and data. + ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz); + ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_), + pkt->data.frame.sz); + } + + bool DoDecode() const override { return false; } + + void set_speed(unsigned int speed) { speed_ = speed; } + + private: + libaom_test::TestMode encoding_mode_; + uint32_t speed_; + FILE *outfile_; + uint32_t out_frames_; +}; + +struct EncodePerfTestVideo { + EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_, + uint32_t bitrate_, int frames_) + : name(name_), width(width_), height(height_), bitrate(bitrate_), + frames(frames_) {} + const char *name; + uint32_t width; + uint32_t height; + uint32_t bitrate; + int frames; +}; + +const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = { + EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470), +}; + +TEST_P(AV1NewEncodeDecodePerfTest, PerfTest) { + SetUp(); + + // TODO(JBB): Make this work by going through the set of given files. + const int i = 0; + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = kAV1EncodePerfTestVectors[i].bitrate; + + init_flags_ = AOM_CODEC_USE_PSNR; + + const char *video_name = kAV1EncodePerfTestVectors[i].name; + libaom_test::I420VideoSource video( + video_name, kAV1EncodePerfTestVectors[i].width, + kAV1EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0, + kAV1EncodePerfTestVectors[i].frames); + set_speed(2); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + const uint32_t threads = 4; + + libaom_test::IVFVideoSource decode_video(kNewEncodeOutputFile); + decode_video.Init(); + + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.threads = threads; + cfg.allow_lowbitdepth = 1; + libaom_test::AV1Decoder decoder(cfg, 0); + + aom_usec_timer t; + aom_usec_timer_start(&t); + + for (decode_video.Begin(); decode_video.cxdata() != nullptr; + decode_video.Next()) { + decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size()); + } + + aom_usec_timer_mark(&t); + const double elapsed_secs = + static_cast<double>(aom_usec_timer_elapsed(&t)) / kUsecsInSec; + const unsigned decode_frames = decode_video.frame_number(); + const double fps = static_cast<double>(decode_frames) / elapsed_secs; + + printf("{\n"); + printf("\t\"type\" : \"decode_perf_test\",\n"); + printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); + printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile); + printf("\t\"threadCount\" : %u,\n", threads); + printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs); + printf("\t\"totalFrames\" : %u,\n", decode_frames); + printf("\t\"framesPerSecond\" : %f\n", fps); + printf("}\n"); +} + +AV1_INSTANTIATE_TEST_SUITE(AV1NewEncodeDecodePerfTest, + ::testing::Values(::libaom_test::kTwoPassGood)); +} // namespace diff --git a/third_party/aom/test/decode_scalability_test.cc b/third_party/aom/test/decode_scalability_test.cc new file mode 100644 index 0000000000..d66c8ec719 --- /dev/null +++ b/third_party/aom/test/decode_scalability_test.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <ostream> + +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/ivf_video_source.h" +#include "test/util.h" +#include "test/video_source.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +struct ObuExtensionHeader { + int temporal_id; + int spatial_id; +}; + +struct DecodeParam { + const char *filename; + const ObuExtensionHeader *headers; + size_t num_headers; +}; + +std::ostream &operator<<(std::ostream &os, const DecodeParam &dp) { + return os << "file: " << dp.filename; +} + +class DecodeScalabilityTest + : public ::libaom_test::DecoderTest, + public ::libaom_test::CodecTestWithParam<DecodeParam> { + protected: + DecodeScalabilityTest() + : DecoderTest(GET_PARAM(0)), headers_(GET_PARAM(1).headers), + num_headers_(GET_PARAM(1).num_headers) {} + + ~DecodeScalabilityTest() override = default; + + void PreDecodeFrameHook(const libaom_test::CompressedVideoSource &video, + libaom_test::Decoder *decoder) override { + if (video.frame_number() == 0) + decoder->Control(AV1D_SET_OUTPUT_ALL_LAYERS, 1); + } + + void DecompressedFrameHook(const aom_image_t &img, + const unsigned int /*frame_number*/) override { + const ObuExtensionHeader &header = headers_[header_index_]; + EXPECT_EQ(img.temporal_id, header.temporal_id); + EXPECT_EQ(img.spatial_id, header.spatial_id); + header_index_ = (header_index_ + 1) % num_headers_; + } + + void RunTest() { + const DecodeParam input = GET_PARAM(1); + aom_codec_dec_cfg_t cfg = { 1, 0, 0, !FORCE_HIGHBITDEPTH_DECODING }; + libaom_test::IVFVideoSource decode_video(input.filename); + decode_video.Init(); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&decode_video, cfg)); + } + + private: + const ObuExtensionHeader *const headers_; + const size_t num_headers_; + size_t header_index_ = 0; +}; + +TEST_P(DecodeScalabilityTest, ObuExtensionHeader) { RunTest(); } + +// For all test files, we have: +// operatingPoint = 0 +// OperatingPointIdc = operating_point_idc[ 0 ] + +// av1-1-b8-01-size-16x16.ivf: +// operating_points_cnt_minus_1 = 0 +// operating_point_idc[ 0 ] = 0x0 +const ObuExtensionHeader kSize16x16Headers[1] = { { 0, 0 } }; + +// av1-1-b8-22-svc-L1T2.ivf: +// operating_points_cnt_minus_1 = 1 +// operating_point_idc[ 0 ] = 0x103 +// operating_point_idc[ 1 ] = 0x101 +const ObuExtensionHeader kL1T2Headers[2] = { { 0, 0 }, { 1, 0 } }; + +// av1-1-b8-22-svc-L2T1.ivf: +// operating_points_cnt_minus_1 = 1 +// operating_point_idc[ 0 ] = 0x301 +// operating_point_idc[ 1 ] = 0x101 +const ObuExtensionHeader kL2T1Headers[2] = { { 0, 0 }, { 0, 1 } }; + +// av1-1-b8-22-svc-L2T2.ivf: +// operating_points_cnt_minus_1 = 3 +// operating_point_idc[ 0 ] = 0x303 +// operating_point_idc[ 1 ] = 0x301 +// operating_point_idc[ 2 ] = 0x103 +// operating_point_idc[ 3 ] = 0x101 +const ObuExtensionHeader kL2T2Headers[4] = { + { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } +}; + +const DecodeParam kAV1DecodeScalabilityTests[] = { + // { filename, headers, num_headers } + { "av1-1-b8-01-size-16x16.ivf", kSize16x16Headers, 1 }, + { "av1-1-b8-22-svc-L1T2.ivf", kL1T2Headers, 2 }, + { "av1-1-b8-22-svc-L2T1.ivf", kL2T1Headers, 2 }, + { "av1-1-b8-22-svc-L2T2.ivf", kL2T2Headers, 4 }, +}; + +AV1_INSTANTIATE_TEST_SUITE(DecodeScalabilityTest, + ::testing::ValuesIn(kAV1DecodeScalabilityTests)); + +} // namespace diff --git a/third_party/aom/test/decode_test_driver.cc b/third_party/aom/test/decode_test_driver.cc new file mode 100644 index 0000000000..f44d670556 --- /dev/null +++ b/third_party/aom/test/decode_test_driver.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/register_state_check.h" +#include "test/video_source.h" + +namespace libaom_test { + +const char kAV1Name[] = "AOMedia Project AV1 Decoder"; + +aom_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size, + aom_codec_stream_info_t *stream_info) { + return aom_codec_peek_stream_info(CodecInterface(), cxdata, size, + stream_info); +} + +aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) { + return DecodeFrame(cxdata, size, nullptr); +} + +aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size, + void *user_priv) { + aom_codec_err_t res_dec; + InitOnce(); + API_REGISTER_STATE_CHECK( + res_dec = aom_codec_decode(&decoder_, cxdata, size, user_priv)); + return res_dec; +} + +bool Decoder::IsAV1() const { + const char *codec_name = GetDecoderName(); + return strncmp(kAV1Name, codec_name, sizeof(kAV1Name) - 1) == 0; +} + +void DecoderTest::HandlePeekResult(Decoder *const /*decoder*/, + CompressedVideoSource * /*video*/, + const aom_codec_err_t res_peek) { + /* The Av1 implementation of PeekStream returns an error only if the + * data passed to it isn't a valid Av1 chunk. */ + ASSERT_EQ(AOM_CODEC_OK, res_peek) + << "Peek return failed: " << aom_codec_err_to_string(res_peek); +} + +void DecoderTest::RunLoop(CompressedVideoSource *video, + const aom_codec_dec_cfg_t &dec_cfg) { + Decoder *const decoder = codec_->CreateDecoder(dec_cfg, flags_); + ASSERT_NE(decoder, nullptr); + bool end_of_file = false; + bool peeked_stream = false; + + // Decode frames. + for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file; + video->Next()) { + PreDecodeFrameHook(*video, decoder); + + aom_codec_stream_info_t stream_info; + stream_info.is_annexb = 0; + + if (video->cxdata() != nullptr) { + if (!peeked_stream) { + // TODO(yaowu): PeekStream returns error for non-sequence_header_obu, + // therefore should only be tried once per sequence, this shall be fixed + // once PeekStream is updated to properly operate on other obus. + const aom_codec_err_t res_peek = decoder->PeekStream( + video->cxdata(), video->frame_size(), &stream_info); + HandlePeekResult(decoder, video, res_peek); + ASSERT_FALSE(::testing::Test::HasFailure()); + peeked_stream = true; + } + + aom_codec_err_t res_dec = + decoder->DecodeFrame(video->cxdata(), video->frame_size()); + if (!HandleDecodeResult(res_dec, *video, decoder)) break; + } else { + // Signal end of the file to the decoder. + const aom_codec_err_t res_dec = decoder->DecodeFrame(nullptr, 0); + ASSERT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + end_of_file = true; + } + + DxDataIterator dec_iter = decoder->GetDxData(); + const aom_image_t *img = nullptr; + + // Get decompressed data + while (!::testing::Test::HasFailure() && (img = dec_iter.Next())) + DecompressedFrameHook(*img, video->frame_number()); + } + delete decoder; +} + +void DecoderTest::RunLoop(CompressedVideoSource *video) { + aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t(); + RunLoop(video, dec_cfg); +} + +void DecoderTest::set_cfg(const aom_codec_dec_cfg_t &dec_cfg) { + memcpy(&cfg_, &dec_cfg, sizeof(cfg_)); +} + +void DecoderTest::set_flags(const aom_codec_flags_t flags) { flags_ = flags; } + +} // namespace libaom_test diff --git a/third_party/aom/test/decode_test_driver.h b/third_party/aom/test/decode_test_driver.h new file mode 100644 index 0000000000..311898ecf0 --- /dev/null +++ b/third_party/aom/test/decode_test_driver.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_DECODE_TEST_DRIVER_H_ +#define AOM_TEST_DECODE_TEST_DRIVER_H_ +#include <cstring> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#include "aom/aom_decoder.h" + +namespace libaom_test { + +class CodecFactory; +class CompressedVideoSource; + +// Provides an object to handle decoding output +class DxDataIterator { + public: + explicit DxDataIterator(aom_codec_ctx_t *decoder) + : decoder_(decoder), iter_(nullptr) {} + + const aom_image_t *Next() { return aom_codec_get_frame(decoder_, &iter_); } + + private: + aom_codec_ctx_t *decoder_; + aom_codec_iter_t iter_; +}; + +// Provides a simplified interface to manage one video decoding. +// Similar to Encoder class, the exact services should be added +// as more tests are added. +class Decoder { + public: + explicit Decoder(aom_codec_dec_cfg_t cfg) + : cfg_(cfg), flags_(0), init_done_(false) { + memset(&decoder_, 0, sizeof(decoder_)); + } + + Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag) + : cfg_(cfg), flags_(flag), init_done_(false) { + memset(&decoder_, 0, sizeof(decoder_)); + } + + virtual ~Decoder() { aom_codec_destroy(&decoder_); } + + aom_codec_err_t PeekStream(const uint8_t *cxdata, size_t size, + aom_codec_stream_info_t *stream_info); + + aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size); + + aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size, + void *user_priv); + + DxDataIterator GetDxData() { return DxDataIterator(&decoder_); } + + void Control(int ctrl_id, int arg) { Control(ctrl_id, arg, AOM_CODEC_OK); } + + void Control(int ctrl_id, const void *arg) { + InitOnce(); + const aom_codec_err_t res = aom_codec_control(&decoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError(); + } + + void Control(int ctrl_id, int arg, aom_codec_err_t expected_value) { + InitOnce(); + const aom_codec_err_t res = aom_codec_control(&decoder_, ctrl_id, arg); + ASSERT_EQ(expected_value, res) << DecodeError(); + } + + const char *DecodeError() { + const char *detail = aom_codec_error_detail(&decoder_); + return detail ? detail : aom_codec_error(&decoder_); + } + + // Passes the external frame buffer information to libaom. + aom_codec_err_t SetFrameBufferFunctions( + aom_get_frame_buffer_cb_fn_t cb_get, + aom_release_frame_buffer_cb_fn_t cb_release, void *user_priv) { + InitOnce(); + return aom_codec_set_frame_buffer_functions(&decoder_, cb_get, cb_release, + user_priv); + } + + const char *GetDecoderName() const { + return aom_codec_iface_name(CodecInterface()); + } + + bool IsAV1() const; + + aom_codec_ctx_t *GetDecoder() { return &decoder_; } + + protected: + virtual aom_codec_iface_t *CodecInterface() const = 0; + + void InitOnce() { + if (!init_done_) { + const aom_codec_err_t res = + aom_codec_dec_init(&decoder_, CodecInterface(), &cfg_, flags_); + ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError(); + init_done_ = true; + } + } + + aom_codec_ctx_t decoder_; + aom_codec_dec_cfg_t cfg_; + aom_codec_flags_t flags_; + bool init_done_; +}; + +// Common test functionality for all Decoder tests. +class DecoderTest { + public: + // Main decoding loop + virtual void RunLoop(CompressedVideoSource *video); + virtual void RunLoop(CompressedVideoSource *video, + const aom_codec_dec_cfg_t &dec_cfg); + + virtual void set_cfg(const aom_codec_dec_cfg_t &dec_cfg); + virtual void set_flags(const aom_codec_flags_t flags); + + // Hook to be called before decompressing every frame. + virtual void PreDecodeFrameHook(const CompressedVideoSource & /*video*/, + Decoder * /*decoder*/) {} + + // Hook to be called to handle decode result. Return true to continue. + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, + const CompressedVideoSource & /*video*/, + Decoder *decoder) { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + return AOM_CODEC_OK == res_dec; + } + + // Hook to be called on every decompressed frame. + virtual void DecompressedFrameHook(const aom_image_t & /*img*/, + const unsigned int /*frame_number*/) {} + + // Hook to be called on peek result + virtual void HandlePeekResult(Decoder *const decoder, + CompressedVideoSource *video, + const aom_codec_err_t res_peek); + + protected: + explicit DecoderTest(const CodecFactory *codec) + : codec_(codec), cfg_(), flags_(0) {} + + virtual ~DecoderTest() = default; + + const CodecFactory *codec_; + aom_codec_dec_cfg_t cfg_; + aom_codec_flags_t flags_; +}; + +} // namespace libaom_test + +#endif // AOM_TEST_DECODE_TEST_DRIVER_H_ diff --git a/third_party/aom/test/decode_to_md5.sh b/third_party/aom/test/decode_to_md5.sh new file mode 100755 index 0000000000..214755f216 --- /dev/null +++ b/third_party/aom/test/decode_to_md5.sh @@ -0,0 +1,77 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom decode_to_md5 example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to decode_to_md5_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available: +# $AV1_IVF_FILE is required. +decode_to_md5_verify_environment() { + if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then + return 1 + fi +} + +# Runs decode_to_md5 on $1 and captures the md5 sum for the final frame. $2 is +# interpreted as codec name and used solely to name the output file. $3 is the +# expected md5 sum: It must match that of the final frame. +decode_to_md5() { + local decoder="$(aom_tool_path decode_to_md5)" + local input_file="$1" + local codec="$2" + local expected_md5="$3" + local output_file="${AOM_TEST_OUTPUT_DIR}/decode_to_md5_${codec}" + + if [ ! -x "${decoder}" ]; then + elog "${decoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ + ${devnull} || return 1 + + [ -e "${output_file}" ] || return 1 + + local md5_last_frame="$(tail -n1 "${output_file}" | awk '{print $1}')" + local actual_md5="$(echo "${md5_last_frame}" | awk '{print $1}')" + if [ "${actual_md5}" = "${expected_md5}" ]; then + return 0 + else + elog "MD5 mismatch:" + elog "Expected: ${expected_md5}" + elog "Actual: ${actual_md5}" + return 1 + fi +} + +DISABLED_decode_to_md5_av1() { + # expected MD5 sum for the last frame. + local expected_md5="567dd6d4b7a7170edddbf58bbcc3aff1" + local file="${AV1_IVF_FILE}" + + # TODO(urvang): Check in the encoded file (like libvpx does) to avoid + # encoding every time. + if [ "$(av1_decode_available)" = "yes" ]; then + if [ ! -e "${AV1_IVF_FILE}" ]; then + file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf" + encode_yuv_raw_input_av1 "${file}" --ivf || return 1 + fi + decode_to_md5 "${file}" "av1" "${expected_md5}" + fi +} + +# TODO(tomfinegan): Enable when the bitstream stabilizes. +decode_to_md5_tests="DISABLED_decode_to_md5_av1" + +run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}" diff --git a/third_party/aom/test/decode_with_drops.sh b/third_party/aom/test/decode_with_drops.sh new file mode 100755 index 0000000000..1fc13ced35 --- /dev/null +++ b/third_party/aom/test/decode_with_drops.sh @@ -0,0 +1,68 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom decode_with_drops example. To add new tests to +## this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to decode_with_drops_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available: +# $AV1_IVF_FILE is required. +decode_with_drops_verify_environment() { + if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then + return 1 + fi +} + +# Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely +# to name the output file. $3 is the drop mode, and is passed directly to +# decode_with_drops. +decode_with_drops() { + local decoder="$(aom_tool_path decode_with_drops)" + local input_file="$1" + local codec="$2" + local output_file="${AOM_TEST_OUTPUT_DIR}/decode_with_drops_${codec}" + local drop_mode="$3" + + if [ ! -x "${decoder}" ]; then + elog "${decoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ + "${drop_mode}" ${devnull} || return 1 + + [ -e "${output_file}" ] || return 1 +} + + +# Decodes $AV1_IVF_FILE while dropping frames, twice: once in sequence mode, +# and once in pattern mode. +DISABLED_decode_with_drops_av1() { + if [ "$(av1_decode_available)" = "yes" ]; then + local file="${AV1_IVF_FILE}" + if [ ! -e "${AV1_IVF_FILE}" ]; then + file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf" + encode_yuv_raw_input_av1 "${file}" --ivf || return 1 + fi + # Drop frames 3 and 4. + decode_with_drops "${file}" "av1" "3-4" || return 1 + + # Test pattern mode: Drop 3 of every 4 frames. + decode_with_drops "${file}" "av1" "3/4" || return 1 + fi +} + +# TODO(yaowu): Disable this test as trailing_bit check is expected to fail +decode_with_drops_tests="DISABLED_decode_with_drops_av1" + +run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}" diff --git a/third_party/aom/test/deltaq_mode_test.cc b/third_party/aom/test/deltaq_mode_test.cc new file mode 100644 index 0000000000..5960d276d1 --- /dev/null +++ b/third_party/aom/test/deltaq_mode_test.cc @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2023, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstddef> +#include <cstdint> +#include <vector> + +#include "aom/aomcx.h" +#include "aom/aom_codec.h" +#include "aom/aom_encoder.h" +#include "aom/aom_image.h" +#include "config/aom_config.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +/* + Reproduces https://crbug.com/aomedia/3376. Emulates the command line: + + ./aomenc --cpu-used=6 --threads=10 --cq-level=14 --passes=1 --limit=1 \ + --lag-in-frames=0 --end-usage=q --deltaq-mode=3 --min-q=0 --max-q=63 \ + -o output.av1 niklas_1280_720_30.y4m +*/ +TEST(DeltaqModeTest, DeltaqMode3MultiThread) { + constexpr int kWidth = 1280; + constexpr int kHeight = 720; + // Dummy buffer of neutral gray samples. + constexpr size_t kBufferSize = kWidth * kHeight + kWidth * kHeight / 2; + std::vector<unsigned char> buffer(kBufferSize, + static_cast<unsigned char>(128)); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1, + buffer.data())); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY), + AOM_CODEC_OK); + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_threads = 10; + cfg.rc_end_usage = AOM_Q; + cfg.g_profile = 0; + cfg.g_bit_depth = AOM_BITS_8; + cfg.g_input_bit_depth = 8; + cfg.g_lag_in_frames = 0; + cfg.rc_min_quantizer = 0; + cfg.rc_max_quantizer = 63; + cfg.g_pass = AOM_RC_ONE_PASS; + cfg.g_limit = 1; + aom_codec_ctx_t enc; + EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 6), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 14), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_DELTAQ_MODE, 3), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_set_option(&enc, "passes", "1"), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_STUDIO_RANGE), + AOM_CODEC_OK); + + EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +// The implementation of multi-threading for deltaq-mode=3 in allintra +// mode is based on row multi-threading. +// The test ensures that When row mt is turned off, +// deltaq-mode = 3 can still properly encode and decode. +TEST(DeltaqModeTest, DeltaqMode3MultiThreadNoRowMT) { + constexpr int kWidth = 1280; + constexpr int kHeight = 720; + // Dummy buffer of neutral gray samples. + constexpr size_t kBufferSize = kWidth * kHeight + kWidth * kHeight / 2; + std::vector<unsigned char> buffer(kBufferSize, + static_cast<unsigned char>(128)); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1, + buffer.data())); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY), + AOM_CODEC_OK); + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_threads = 10; + cfg.rc_end_usage = AOM_Q; + cfg.g_profile = 0; + cfg.g_bit_depth = AOM_BITS_8; + cfg.g_input_bit_depth = 8; + cfg.g_lag_in_frames = 0; + cfg.rc_min_quantizer = 0; + cfg.rc_max_quantizer = 63; + cfg.g_pass = AOM_RC_ONE_PASS; + cfg.g_limit = 1; + aom_codec_ctx_t enc; + EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 0), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 6), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 14), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_DELTAQ_MODE, 3), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_set_option(&enc, "passes", "1"), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_STUDIO_RANGE), + AOM_CODEC_OK); + + EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +#if CONFIG_AV1_HIGHBITDEPTH +// 10-bit version of the DeltaqMode3MultiThread test. +TEST(DeltaqModeTest, DeltaqMode3MultiThreadHighbd) { + constexpr int kWidth = 1280; + constexpr int kHeight = 720; + // Dummy buffer of 10-bit neutral gray samples. + constexpr size_t kBufferSize = kWidth * kHeight + kWidth * kHeight / 2; + std::vector<uint16_t> buffer(kBufferSize, 512); + + aom_image_t img; + EXPECT_EQ(&img, + aom_img_wrap(&img, AOM_IMG_FMT_I42016, kWidth, kHeight, 1, + reinterpret_cast<unsigned char *>(buffer.data()))); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY), + AOM_CODEC_OK); + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_threads = 10; + cfg.rc_end_usage = AOM_Q; + cfg.g_profile = 0; + cfg.g_bit_depth = AOM_BITS_10; + cfg.g_input_bit_depth = 10; + cfg.g_lag_in_frames = 0; + cfg.rc_min_quantizer = 0; + cfg.rc_max_quantizer = 63; + cfg.g_pass = AOM_RC_ONE_PASS; + cfg.g_limit = 1; + aom_codec_ctx_t enc; + EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 6), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 14), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_DELTAQ_MODE, 3), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_set_option(&enc, "passes", "1"), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_STUDIO_RANGE), + AOM_CODEC_OK); + + EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/disflow_test.cc b/third_party/aom/test/disflow_test.cc new file mode 100644 index 0000000000..124c9a96c7 --- /dev/null +++ b/third_party/aom/test/disflow_test.cc @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2023, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "aom_dsp/flow_estimation/disflow.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "test/yuv_video_source.h" + +namespace { + +using ComputeFlowAtPointFunc = void (*)(const uint8_t *src, const uint8_t *ref, + int x, int y, int width, int height, + int stride, double *u, double *v); + +class ComputeFlowTest + : public ::testing::TestWithParam<ComputeFlowAtPointFunc> { + public: + ComputeFlowTest() + : target_func_(GetParam()), + rnd_(libaom_test::ACMRandom::DeterministicSeed()) {} + + protected: + void RunCheckOutput(int run_times); + ComputeFlowAtPointFunc target_func_; + + libaom_test::ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ComputeFlowTest); + +void ComputeFlowTest::RunCheckOutput(int run_times) { + constexpr int kWidth = 352; + constexpr int kHeight = 288; + + ::libaom_test::YUVVideoSource video("bus_352x288_420_f20_b8.yuv", + AOM_IMG_FMT_I420, kWidth, kHeight, 30, 1, + 0, 2); + // Use Y (Luminance) plane. + video.Begin(); + uint8_t *src = video.img()->planes[0]; + ASSERT_NE(src, nullptr); + video.Next(); + uint8_t *ref = video.img()->planes[0]; + ASSERT_NE(ref, nullptr); + + // Pick a random value between -5 and 5. The range was chosen arbitrarily as + // u and v can take any kind of value in practise, but it shouldn't change the + // outcome of the tests. + const double u_rand = (static_cast<double>(rnd_.Rand8()) / 255) * 10 - 5; + double u_ref = u_rand; + double u_test = u_rand; + + const double v_rand = (static_cast<double>(rnd_.Rand8()) / 255) * 10 - 5; + double v_ref = v_rand; + double v_test = v_rand; + + // Pick a random point in the frame. If the frame is 352x288, that means we + // can call the function on all values of x comprised between 8 and 344, and + // all values of y comprised between 8 and 280. + const int x = rnd_((kWidth - 8) - 8 + 1) + 8; + const int y = rnd_((kHeight - 8) - 8 + 1) + 8; + + aom_usec_timer ref_timer, test_timer; + + aom_compute_flow_at_point_c(src, ref, x, y, kWidth, kHeight, kWidth, &u_ref, + &v_ref); + + target_func_(src, ref, x, y, kWidth, kHeight, kWidth, &u_test, &v_test); + + if (run_times > 1) { + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < run_times; ++i) { + aom_compute_flow_at_point_c(src, ref, x, y, kWidth, kHeight, kWidth, + &u_ref, &v_ref); + } + aom_usec_timer_mark(&ref_timer); + const double elapsed_time_c = + static_cast<double>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int i = 0; i < run_times; ++i) { + target_func_(src, ref, x, y, kWidth, kHeight, kWidth, &u_test, &v_test); + } + aom_usec_timer_mark(&test_timer); + const double elapsed_time_simd = + static_cast<double>(aom_usec_timer_elapsed(&test_timer)); + + printf("c_time=%fns \t simd_time=%fns \t speedup=%.2f\n", elapsed_time_c, + elapsed_time_simd, (elapsed_time_c / elapsed_time_simd)); + } else { + ASSERT_EQ(u_ref, u_test); + ASSERT_EQ(v_ref, v_test); + } +} + +TEST_P(ComputeFlowTest, CheckOutput) { RunCheckOutput(1); } + +TEST_P(ComputeFlowTest, DISABLED_Speed) { RunCheckOutput(10000000); } + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, ComputeFlowTest, + ::testing::Values(aom_compute_flow_at_point_sse4_1)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, ComputeFlowTest, + ::testing::Values(aom_compute_flow_at_point_neon)); +#endif + +} // namespace diff --git a/third_party/aom/test/divu_small_test.cc b/third_party/aom/test/divu_small_test.cc new file mode 100644 index 0000000000..496fbc1f8e --- /dev/null +++ b/third_party/aom/test/divu_small_test.cc @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdlib.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "aom_dsp/odintrin.h" + +using libaom_test::ACMRandom; + +TEST(DivuSmallTest, TestDIVUuptoMAX) { + for (int d = 1; d <= OD_DIVU_DMAX; d++) { + for (uint32_t x = 1; x <= 1000000; x++) { + GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d)) + << "x=" << x << " d=" << d << " x/d=" << (x / d) + << " != " << OD_DIVU_SMALL(x, d); + } + } +} + +TEST(DivuSmallTest, TestDIVUrandI31) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int d = 1; d < OD_DIVU_DMAX; d++) { + for (int i = 0; i < 1000000; i++) { + uint32_t x = rnd.Rand31(); + GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d)) + << "x=" << x << " d=" << d << " x/d=" << (x / d) + << " != " << OD_DIVU_SMALL(x, d); + } + } +} diff --git a/third_party/aom/test/dr_prediction_test.cc b/third_party/aom/test/dr_prediction_test.cc new file mode 100644 index 0000000000..3865810e9b --- /dev/null +++ b/third_party/aom/test/dr_prediction_test.cc @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom_mem/aom_mem.h" +#include "aom_ports/aom_timer.h" +#include "av1/common/blockd.h" +#include "av1/common/pred_common.h" +#include "av1/common/reconintra.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { + +const int kZ1Start = 0; +const int kZ2Start = 90; +const int kZ3Start = 180; + +const TX_SIZE kTxSize[] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_64X64, + TX_4X8, TX_8X4, TX_8X16, TX_16X8, TX_16X32, + TX_32X16, TX_32X64, TX_64X32, TX_4X16, TX_16X4, + TX_8X32, TX_32X8, TX_16X64, TX_64X16 }; + +const char *const kTxSizeStrings[] = { + "TX_4X4", "TX_8X8", "TX_16X16", "TX_32X32", "TX_64X64", + "TX_4X8", "TX_8X4", "TX_8X16", "TX_16X8", "TX_16X32", + "TX_32X16", "TX_32X64", "TX_64X32", "TX_4X16", "TX_16X4", + "TX_8X32", "TX_32X8", "TX_16X64", "TX_64X16" +}; + +using libaom_test::ACMRandom; + +typedef void (*DrPred_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh, + const uint16_t *above, const uint16_t *left, + int upsample_above, int upsample_left, int dx, + int dy, int bd); + +typedef void (*DrPred)(uint8_t *dst, ptrdiff_t stride, int bw, int bh, + const uint8_t *above, const uint8_t *left, + int upsample_above, int upsample_left, int dx, int dy, + int bd); + +typedef void (*Z1_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh, + const uint8_t *above, const uint8_t *left, + int upsample_above, int dx, int dy); +template <Z1_Lbd fn> +void z1_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh, + const uint8_t *above, const uint8_t *left, int upsample_above, + int upsample_left, int dx, int dy, int bd) { + (void)bd; + (void)upsample_left; + fn(dst, stride, bw, bh, above, left, upsample_above, dx, dy); +} + +typedef void (*Z2_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh, + const uint8_t *above, const uint8_t *left, + int upsample_above, int upsample_left, int dx, int dy); +template <Z2_Lbd fn> +void z2_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh, + const uint8_t *above, const uint8_t *left, int upsample_above, + int upsample_left, int dx, int dy, int bd) { + (void)bd; + (void)upsample_left; + fn(dst, stride, bw, bh, above, left, upsample_above, upsample_left, dx, dy); +} + +typedef void (*Z3_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh, + const uint8_t *above, const uint8_t *left, + int upsample_left, int dx, int dy); +template <Z3_Lbd fn> +void z3_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh, + const uint8_t *above, const uint8_t *left, int upsample_above, + int upsample_left, int dx, int dy, int bd) { + (void)bd; + (void)upsample_above; + fn(dst, stride, bw, bh, above, left, upsample_left, dx, dy); +} + +typedef void (*Z1_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh, + const uint16_t *above, const uint16_t *left, + int upsample_above, int dx, int dy, int bd); +template <Z1_Hbd fn> +void z1_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh, + const uint16_t *above, const uint16_t *left, + int upsample_above, int upsample_left, int dx, int dy, + int bd) { + (void)bd; + (void)upsample_left; + fn(dst, stride, bw, bh, above, left, upsample_above, dx, dy, bd); +} + +typedef void (*Z2_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh, + const uint16_t *above, const uint16_t *left, + int upsample_above, int upsample_left, int dx, int dy, + int bd); +template <Z2_Hbd fn> +void z2_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh, + const uint16_t *above, const uint16_t *left, + int upsample_above, int upsample_left, int dx, int dy, + int bd) { + (void)bd; + fn(dst, stride, bw, bh, above, left, upsample_above, upsample_left, dx, dy, + bd); +} + +typedef void (*Z3_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh, + const uint16_t *above, const uint16_t *left, + int upsample_left, int dx, int dy, int bd); +template <Z3_Hbd fn> +void z3_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh, + const uint16_t *above, const uint16_t *left, + int upsample_above, int upsample_left, int dx, int dy, + int bd) { + (void)bd; + (void)upsample_above; + fn(dst, stride, bw, bh, above, left, upsample_left, dx, dy, bd); +} + +template <typename FuncType> +struct DrPredFunc { + DrPredFunc(FuncType pred = nullptr, FuncType tst = nullptr, + int bit_depth_value = 0, int start_angle_value = 0) + : ref_fn(pred), tst_fn(tst), bit_depth(bit_depth_value), + start_angle(start_angle_value) {} + + FuncType ref_fn; + FuncType tst_fn; + int bit_depth; + int start_angle; +}; + +template <typename Pixel, typename FuncType> +class DrPredTest : public ::testing::TestWithParam<DrPredFunc<FuncType> > { + protected: + static const int kMaxNumTests = 10000; + static const int kIterations = 10; + static const int kDstStride = 64; + static const int kDstSize = kDstStride * kDstStride; + static const int kOffset = 16; + static const int kBufSize = ((2 * MAX_TX_SIZE) << 1) + 16; + + DrPredTest() + : enable_upsample_(0), upsample_above_(0), upsample_left_(0), bw_(0), + bh_(0), dx_(1), dy_(1), bd_(8), txsize_(TX_4X4) { + params_ = this->GetParam(); + start_angle_ = params_.start_angle; + stop_angle_ = start_angle_ + 90; + + dst_ref_ = &dst_ref_data_[0]; + dst_tst_ = &dst_tst_data_[0]; + dst_stride_ = kDstStride; + above_ = &above_data_[kOffset]; + left_ = &left_data_[kOffset]; + + for (int i = 0; i < kBufSize; ++i) { + above_data_[i] = rng_.Rand8(); + left_data_[i] = rng_.Rand8(); + } + + for (int i = 0; i < kDstSize; ++i) { + dst_ref_[i] = 0; + dst_tst_[i] = 0; + } + } + + ~DrPredTest() override = default; + + void Predict(bool speedtest, int tx) { + const int kNumTests = speedtest ? kMaxNumTests : 1; + aom_usec_timer timer; + int tst_time = 0; + + bd_ = params_.bit_depth; + + aom_usec_timer_start(&timer); + for (int k = 0; k < kNumTests; ++k) { + params_.ref_fn(dst_ref_, dst_stride_, bw_, bh_, above_, left_, + upsample_above_, upsample_left_, dx_, dy_, bd_); + } + aom_usec_timer_mark(&timer); + const int ref_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + + if (params_.tst_fn) { + aom_usec_timer_start(&timer); + for (int k = 0; k < kNumTests; ++k) { + API_REGISTER_STATE_CHECK(params_.tst_fn(dst_tst_, dst_stride_, bw_, bh_, + above_, left_, upsample_above_, + upsample_left_, dx_, dy_, bd_)); + } + aom_usec_timer_mark(&timer); + tst_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + } else { + for (int i = 0; i < kDstSize; ++i) { + dst_ref_[i] = dst_tst_[i]; + } + } + + OutputTimes(kNumTests, ref_time, tst_time, tx); + } + + void RunTest(bool speedtest, bool needsaturation, int p_angle) { + bd_ = params_.bit_depth; + + if (needsaturation) { + for (int i = 0; i < kBufSize; ++i) { + above_data_[i] = left_data_[i] = (1 << bd_) - 1; + } + } + for (int tx = 0; tx < TX_SIZES_ALL; ++tx) { + if (params_.tst_fn == nullptr) { + for (int i = 0; i < kDstSize; ++i) { + dst_tst_[i] = (1 << bd_) - 1; + dst_ref_[i] = (1 << bd_) - 1; + } + } else { + for (int i = 0; i < kDstSize; ++i) { + dst_ref_[i] = 0; + dst_tst_[i] = 0; + } + } + + bw_ = tx_size_wide[kTxSize[tx]]; + bh_ = tx_size_high[kTxSize[tx]]; + + if (enable_upsample_) { + upsample_above_ = + av1_use_intra_edge_upsample(bw_, bh_, p_angle - 90, 0); + upsample_left_ = + av1_use_intra_edge_upsample(bw_, bh_, p_angle - 180, 0); + } else { + upsample_above_ = upsample_left_ = 0; + } + + Predict(speedtest, tx); + + for (int r = 0; r < bh_; ++r) { + for (int c = 0; c < bw_; ++c) { + ASSERT_EQ(dst_ref_[r * dst_stride_ + c], + dst_tst_[r * dst_stride_ + c]) + << bw_ << "x" << bh_ << " r: " << r << " c: " << c + << " dx: " << dx_ << " dy: " << dy_ + << " upsample_above: " << upsample_above_ + << " upsample_left: " << upsample_left_; + } + } + } + } + + void OutputTimes(int num_tests, int ref_time, int tst_time, int tx) { + if (num_tests > 1) { + if (params_.tst_fn) { + const float x = static_cast<float>(ref_time) / tst_time; + printf("\t[%8s] :: ref time %6d, tst time %6d %3.2f\n", + kTxSizeStrings[tx], ref_time, tst_time, x); + } else { + printf("\t[%8s] :: ref time %6d\n", kTxSizeStrings[tx], ref_time); + } + } + } + + void RundrPredTest(const int speed) { + if (params_.tst_fn == nullptr) return; + const int angles[] = { 3, 45, 87 }; + const int start_angle = speed ? 0 : start_angle_; + const int stop_angle = speed ? 3 : stop_angle_; + for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) { + for (int i = start_angle; i < stop_angle; ++i) { + const int angle = speed ? angles[i] + start_angle_ : i; + dx_ = av1_get_dx(angle); + dy_ = av1_get_dy(angle); + if (speed) { + printf("enable_upsample: %d angle: %d ~~~~~~~~~~~~~~~\n", + enable_upsample_, angle); + } + if (dx_ && dy_) RunTest(speed, false, angle); + } + } + } + + Pixel dst_ref_data_[kDstSize]; + Pixel dst_tst_data_[kDstSize]; + + Pixel left_data_[kBufSize]; + Pixel dummy_data_[kBufSize]; + Pixel above_data_[kBufSize]; + + Pixel *dst_ref_; + Pixel *dst_tst_; + Pixel *above_; + Pixel *left_; + int dst_stride_; + + int enable_upsample_; + int upsample_above_; + int upsample_left_; + int bw_; + int bh_; + int dx_; + int dy_; + int bd_; + TX_SIZE txsize_; + + int start_angle_; + int stop_angle_; + + ACMRandom rng_; + + DrPredFunc<FuncType> params_; +}; + +class LowbdDrPredTest : public DrPredTest<uint8_t, DrPred> {}; + +TEST_P(LowbdDrPredTest, SaturatedValues) { + for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) { + for (int angle = start_angle_; angle < stop_angle_; ++angle) { + dx_ = av1_get_dx(angle); + dy_ = av1_get_dy(angle); + if (dx_ && dy_) RunTest(false, true, angle); + } + } +} + +using std::make_tuple; + +INSTANTIATE_TEST_SUITE_P( + C, LowbdDrPredTest, + ::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>, + nullptr, AOM_BITS_8, kZ1Start), + DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>, + nullptr, AOM_BITS_8, kZ2Start), + DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>, + nullptr, AOM_BITS_8, kZ3Start))); + +#if CONFIG_AV1_HIGHBITDEPTH +class HighbdDrPredTest : public DrPredTest<uint16_t, DrPred_Hbd> {}; + +TEST_P(HighbdDrPredTest, SaturatedValues) { + for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) { + for (int angle = start_angle_; angle < stop_angle_; ++angle) { + dx_ = av1_get_dx(angle); + dy_ = av1_get_dy(angle); + if (dx_ && dy_) RunTest(false, true, angle); + } + } +} + +INSTANTIATE_TEST_SUITE_P( + C, HighbdDrPredTest, + ::testing::Values( + DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>, + nullptr, AOM_BITS_8, kZ1Start), + DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>, + nullptr, AOM_BITS_10, kZ1Start), + DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>, + nullptr, AOM_BITS_12, kZ1Start), + DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>, + nullptr, AOM_BITS_8, kZ2Start), + DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>, + nullptr, AOM_BITS_10, kZ2Start), + DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>, + nullptr, AOM_BITS_12, kZ2Start), + DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>, + nullptr, AOM_BITS_8, kZ3Start), + DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>, + nullptr, AOM_BITS_10, kZ3Start), + DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>, + nullptr, AOM_BITS_12, kZ3Start))); +#endif // CONFIG_AV1_HIGHBITDEPTH + +TEST_P(LowbdDrPredTest, OperationCheck) { RundrPredTest(0); } + +TEST_P(LowbdDrPredTest, DISABLED_Speed) { RundrPredTest(1); } + +#if CONFIG_AV1_HIGHBITDEPTH +TEST_P(HighbdDrPredTest, OperationCheck) { + if (params_.tst_fn == nullptr) return; + for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) { + for (int angle = start_angle_; angle < stop_angle_; angle++) { + dx_ = av1_get_dx(angle); + dy_ = av1_get_dy(angle); + if (dx_ && dy_) RunTest(false, false, angle); + } + } +} + +TEST_P(HighbdDrPredTest, DISABLED_Speed) { + const int angles[] = { 3, 45, 87 }; + for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) { + for (int i = 0; i < 3; ++i) { + int angle = angles[i] + start_angle_; + dx_ = av1_get_dx(angle); + dy_ = av1_get_dy(angle); + printf("enable_upsample: %d angle: %d ~~~~~~~~~~~~~~~\n", + enable_upsample_, angle); + if (dx_ && dy_) RunTest(true, false, angle); + } + } +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, LowbdDrPredTest, + ::testing::Values( + DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>, + &z1_wrapper<av1_dr_prediction_z1_sse4_1>, AOM_BITS_8, + kZ1Start), + DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>, + &z2_wrapper<av1_dr_prediction_z2_sse4_1>, AOM_BITS_8, + kZ2Start), + DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>, + &z3_wrapper<av1_dr_prediction_z3_sse4_1>, AOM_BITS_8, + kZ3Start))); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, LowbdDrPredTest, + ::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>, + &z1_wrapper<av1_dr_prediction_z1_avx2>, + AOM_BITS_8, kZ1Start), + DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>, + &z2_wrapper<av1_dr_prediction_z2_avx2>, + AOM_BITS_8, kZ2Start), + DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>, + &z3_wrapper<av1_dr_prediction_z3_avx2>, + AOM_BITS_8, kZ3Start))); + +#if CONFIG_AV1_HIGHBITDEPTH +INSTANTIATE_TEST_SUITE_P( + AVX2, HighbdDrPredTest, + ::testing::Values(DrPredFunc<DrPred_Hbd>( + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>, + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_avx2>, + AOM_BITS_8, kZ1Start), + DrPredFunc<DrPred_Hbd>( + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>, + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_avx2>, + AOM_BITS_10, kZ1Start), + DrPredFunc<DrPred_Hbd>( + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>, + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_avx2>, + AOM_BITS_12, kZ1Start), + DrPredFunc<DrPred_Hbd>( + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>, + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_avx2>, + AOM_BITS_8, kZ2Start), + DrPredFunc<DrPred_Hbd>( + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>, + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_avx2>, + AOM_BITS_10, kZ2Start), + DrPredFunc<DrPred_Hbd>( + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>, + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_avx2>, + AOM_BITS_12, kZ2Start), + DrPredFunc<DrPred_Hbd>( + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>, + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_avx2>, + AOM_BITS_8, kZ3Start), + DrPredFunc<DrPred_Hbd>( + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>, + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_avx2>, + AOM_BITS_10, kZ3Start), + DrPredFunc<DrPred_Hbd>( + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>, + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_avx2>, + AOM_BITS_12, kZ3Start))); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, LowbdDrPredTest, + ::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>, + &z1_wrapper<av1_dr_prediction_z1_neon>, + AOM_BITS_8, kZ1Start), + DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>, + &z2_wrapper<av1_dr_prediction_z2_neon>, + AOM_BITS_8, kZ2Start), + DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>, + &z3_wrapper<av1_dr_prediction_z3_neon>, + AOM_BITS_8, kZ3Start))); + +#if CONFIG_AV1_HIGHBITDEPTH +INSTANTIATE_TEST_SUITE_P( + NEON, HighbdDrPredTest, + ::testing::Values(DrPredFunc<DrPred_Hbd>( + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>, + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_neon>, + AOM_BITS_8, kZ1Start), + DrPredFunc<DrPred_Hbd>( + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>, + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_neon>, + AOM_BITS_10, kZ1Start), + DrPredFunc<DrPred_Hbd>( + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>, + &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_neon>, + AOM_BITS_12, kZ1Start), + DrPredFunc<DrPred_Hbd>( + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>, + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_neon>, + AOM_BITS_8, kZ2Start), + DrPredFunc<DrPred_Hbd>( + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>, + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_neon>, + AOM_BITS_10, kZ2Start), + DrPredFunc<DrPred_Hbd>( + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>, + &z2_wrapper_hbd<av1_highbd_dr_prediction_z2_neon>, + AOM_BITS_12, kZ2Start), + DrPredFunc<DrPred_Hbd>( + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>, + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>, + AOM_BITS_8, kZ3Start), + DrPredFunc<DrPred_Hbd>( + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>, + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>, + AOM_BITS_10, kZ3Start), + DrPredFunc<DrPred_Hbd>( + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>, + &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>, + AOM_BITS_12, kZ3Start))); +#endif // CONFIG_AV1_HIGHBITDEPTH + +#endif // HAVE_NEON + +} // namespace diff --git a/third_party/aom/test/dropframe_encode_test.cc b/third_party/aom/test/dropframe_encode_test.cc new file mode 100644 index 0000000000..4a54c0b95c --- /dev/null +++ b/third_party/aom/test/dropframe_encode_test.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +// Params: test mode, threads. +class DropFrameEncodeTestLarge + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, + unsigned int>, + public ::libaom_test::EncoderTest { + protected: + DropFrameEncodeTestLarge() + : EncoderTest(GET_PARAM(0)), frame_number_(0), threads_(GET_PARAM(2)) {} + + void SetUp() override { InitializeConfig(GET_PARAM(1)); } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + frame_number_ = video->frame(); + if (frame_number_ == 0) { + encoder->Control(AOME_SET_CPUUSED, 1); + } + } + + unsigned int frame_number_; + unsigned int threads_; +}; + +// Test to reproduce the assertion failure related to buf->display_idx in +// init_gop_frames_for_tpl() and segmentation fault reported in aomedia:3372 +// while encoding with --drop-frame=1. +TEST_P(DropFrameEncodeTestLarge, TestNoMisMatch) { + cfg_.rc_end_usage = AOM_CBR; + cfg_.rc_buf_sz = 1; + cfg_.g_pass = AOM_RC_ONE_PASS; + cfg_.rc_dropframe_thresh = 1; + cfg_.g_threads = threads_; + + ::libaom_test::I420VideoSource video("desktopqvga2.320_240.yuv", 320, 240, 30, + 1, 0, 100); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_SUITE(DropFrameEncodeTestLarge, + ::testing::Values(::libaom_test::kOnePassGood), + ::testing::Values(1, 4)); + +} // namespace diff --git a/third_party/aom/test/dump_obu.sh b/third_party/aom/test/dump_obu.sh new file mode 100755 index 0000000000..933db64a6a --- /dev/null +++ b/third_party/aom/test/dump_obu.sh @@ -0,0 +1,77 @@ +#!/bin/sh +## Copyright (c) 2018, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom dump_obu tool. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to dump_obu_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +readonly dump_obu_test_file="${AOM_TEST_OUTPUT_DIR}/av1_obu_test.ivf" + +dump_obu_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ "$(dump_obu_available)" = "yes" ]; then + if [ -z "$(aom_tool_path dump_obu)" ]; then + elog "dump_obu not found in LIBAOM_BIN_PATH, its parent, or child tools/." + fi + fi +} + +dump_obu_available() { + if [ "$(av1_decode_available)" = "yes" ] && \ + [ "$(av1_encode_available)" = "yes" ]; then + echo yes + fi +} + +aomenc_available() { + if [ -x "$(aom_tool_path aomenc)" ]; then + echo yes + fi +} + +encode_test_file() { + if [ "$(aomenc_available)" = "yes" ]; then + local encoder="$(aom_tool_path aomenc)" + if [ "$(realtime_only_build)" = "yes" ]; then + eval "${encoder}" \ + $(aomenc_encode_test_rt_params) \ + $(yuv_raw_input) \ + --ivf \ + --output=${dump_obu_test_file} \ + ${devnull} || return 1 + else + eval "${encoder}" \ + $(aomenc_encode_test_fast_params) \ + $(yuv_raw_input) \ + --ivf \ + --output=${dump_obu_test_file} \ + ${devnull} || return 1 + fi + if [ ! -e "${dump_obu_test_file}" ]; then + elog "dump_obu test input encode failed." + return 1 + fi + fi +} + +dump_obu() { + encode_test_file || return 1 + eval $(aom_tool_path dump_obu) "${dump_obu_test_file}" ${devnull} +} + +dump_obu_tests="dump_obu" + +run_tests dump_obu_verify_environment "${dump_obu_tests}" diff --git a/third_party/aom/test/ec_test.cc b/third_party/aom/test/ec_test.cc new file mode 100644 index 0000000000..a5284deac0 --- /dev/null +++ b/third_party/aom/test/ec_test.cc @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include <cstdlib> +#include <memory> +#include <new> + +#include "aom_dsp/entenc.h" +#include "aom_dsp/entdec.h" + +TEST(EC_TEST, random_ec_test) { + od_ec_enc enc; + od_ec_dec dec; + int sz; + int i; + int ret; + unsigned int seed; + unsigned char *ptr; + uint32_t ptr_sz; + char *seed_str; + ret = 0; + seed_str = getenv("EC_TEST_SEED"); + if (seed_str) { + seed = atoi(seed_str); + } else { + seed = 0xdaa1a; + } + srand(seed); + od_ec_enc_init(&enc, 1); + /*Test compatibility between multiple different encode/decode routines.*/ + for (i = 0; i < 409600; i++) { + int j; + sz = rand() / ((RAND_MAX >> (rand() % 9U)) + 1U); + std::unique_ptr<unsigned[]> fz(new (std::nothrow) unsigned[sz]); + ASSERT_NE(fz, nullptr); + std::unique_ptr<unsigned[]> fts(new (std::nothrow) unsigned[sz]); + ASSERT_NE(fts, nullptr); + std::unique_ptr<unsigned[]> data(new (std::nothrow) unsigned[sz]); + ASSERT_NE(data, nullptr); + std::unique_ptr<unsigned[]> tell(new (std::nothrow) unsigned[sz + 1]); + ASSERT_NE(tell, nullptr); + std::unique_ptr<unsigned[]> enc_method(new (std::nothrow) unsigned[sz]); + ASSERT_NE(enc_method, nullptr); + od_ec_enc_reset(&enc); + tell[0] = od_ec_enc_tell_frac(&enc); + for (j = 0; j < sz; j++) { + data[j] = rand() / ((RAND_MAX >> 1) + 1); + + fts[j] = CDF_PROB_BITS; + fz[j] = (rand() % (CDF_PROB_TOP - 2)) >> (CDF_PROB_BITS - fts[j]); + fz[j] = OD_MAXI(fz[j], 1); + enc_method[j] = 3 + (rand() & 1); + switch (enc_method[j]) { + case 3: { + od_ec_encode_bool_q15(&enc, data[j], + OD_ICDF(fz[j] << (CDF_PROB_BITS - fts[j]))); + break; + } + case 4: { + uint16_t cdf[2]; + cdf[0] = OD_ICDF(fz[j]); + cdf[1] = OD_ICDF(1U << fts[j]); + od_ec_encode_cdf_q15(&enc, data[j], cdf, 2); + break; + } + } + + tell[j + 1] = od_ec_enc_tell_frac(&enc); + } + ptr = od_ec_enc_done(&enc, &ptr_sz); + ASSERT_NE(ptr, nullptr); + EXPECT_GE(((od_ec_enc_tell(&enc) + 7U) >> 3), ptr_sz) + << "od_ec_enc_tell() lied: " + "there's " + << ptr_sz << " bytes instead of " << ((od_ec_enc_tell(&enc) + 7) >> 3) + << " (Random seed: " << seed << ")\n"; + od_ec_dec_init(&dec, ptr, ptr_sz); + EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[0]) + << "od_ec_dec_tell() mismatch between encoder and decoder " + "at symbol 0: " + << (unsigned)od_ec_dec_tell_frac(&dec) << " instead of " << tell[0] + << " (Random seed: " << seed << ").\n"; + for (j = 0; j < sz; j++) { + int dec_method; + unsigned int sym = data[j] + 1; // Initialize sym to an invalid value. + + dec_method = 3 + (rand() & 1); + + switch (dec_method) { + case 3: { + sym = od_ec_decode_bool_q15( + &dec, OD_ICDF(fz[j] << (CDF_PROB_BITS - fts[j]))); + break; + } + case 4: { + uint16_t cdf[2]; + cdf[0] = OD_ICDF(fz[j]); + cdf[1] = OD_ICDF(1U << fts[j]); + sym = od_ec_decode_cdf_q15(&dec, cdf, 2); + break; + } + } + + EXPECT_EQ(sym, data[j]) + << "Decoded " << sym << " instead of " << data[j] + << " with fz=" << fz[j] << " and ftb=" << fts[j] << "at position " + << j << " of " << sz << " (Random seed: " << seed << ").\n" + << "Encoding method: " << enc_method[j] + << " decoding method: " << dec_method << "\n"; + EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[j + 1]) + << "od_ec_dec_tell() mismatch between encoder and " + "decoder at symbol " + << j + 1 << ": " << (unsigned)od_ec_dec_tell_frac(&dec) + << " instead of " << tell[j + 1] << " (Random seed: " << seed + << ").\n"; + } + } + od_ec_enc_reset(&enc); + od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384)); + od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384)); + od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384)); + od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384)); + od_ec_encode_bool_q15(&enc, 0, OD_ICDF(24576)); + od_ec_enc_patch_initial_bits(&enc, 3, 2); + EXPECT_FALSE(enc.error) << "od_ec_enc_patch_initial_bits() failed.\n"; + od_ec_enc_patch_initial_bits(&enc, 0, 5); + EXPECT_TRUE(enc.error) + << "od_ec_enc_patch_initial_bits() didn't fail when it should have.\n"; + od_ec_enc_reset(&enc); + od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384)); + od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384)); + od_ec_encode_bool_q15(&enc, 1, OD_ICDF(32256)); + od_ec_encode_bool_q15(&enc, 0, OD_ICDF(24576)); + od_ec_enc_patch_initial_bits(&enc, 0, 2); + EXPECT_FALSE(enc.error) << "od_ec_enc_patch_initial_bits() failed.\n"; + ptr = od_ec_enc_done(&enc, &ptr_sz); + ASSERT_NE(ptr, nullptr); + EXPECT_EQ(ptr_sz, 2u); + EXPECT_EQ(ptr[0], 63) + << "Got " << ptr[0] + << " when expecting 63 for od_ec_enc_patch_initial_bits().\n"; + od_ec_enc_clear(&enc); + EXPECT_EQ(ret, 0); +} diff --git a/third_party/aom/test/encode_api_test.cc b/third_party/aom/test/encode_api_test.cc new file mode 100644 index 0000000000..aa4084f9e4 --- /dev/null +++ b/third_party/aom/test/encode_api_test.cc @@ -0,0 +1,659 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cassert> +#include <cstdlib> +#include <cstring> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#include "aom/aomcx.h" +#include "aom/aom_encoder.h" +#include "aom/aom_image.h" + +namespace { + +#if CONFIG_REALTIME_ONLY +const unsigned int kUsage = AOM_USAGE_REALTIME; +#else +const unsigned int kUsage = AOM_USAGE_GOOD_QUALITY; +#endif + +static void *Memset16(void *dest, int val, size_t length) { + uint16_t *dest16 = (uint16_t *)dest; + for (size_t i = 0; i < length; ++i) *dest16++ = val; + return dest; +} + +TEST(EncodeAPI, InvalidParams) { + uint8_t buf[1] = { 0 }; + aom_image_t img; + aom_codec_ctx_t enc; + aom_codec_enc_cfg_t cfg; + + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, 1, 1, 1, buf)); + + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_init(nullptr, nullptr, nullptr, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_init(&enc, nullptr, nullptr, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_encode(nullptr, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(nullptr, &img, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(nullptr)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_config_default(nullptr, nullptr, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_config_default(nullptr, &cfg, 0)); + EXPECT_NE(aom_codec_error(nullptr), nullptr); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + SCOPED_TRACE(aom_codec_iface_name(iface)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_init(nullptr, iface, nullptr, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_init(&enc, iface, nullptr, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + aom_codec_enc_config_default(iface, &cfg, 3)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + cfg.g_w = 1 << 16; + cfg.g_h = (1 << 14) + 1; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + cfg.g_w = (1 << 14) + 1; + cfg.g_h = 1 << 16; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + cfg.g_forced_max_frame_width = 1 << 16; + cfg.g_forced_max_frame_height = (1 << 14) + 1; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + cfg.g_forced_max_frame_width = (1 << 14) + 1; + cfg.g_forced_max_frame_height = 1 << 16; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(nullptr, aom_codec_get_global_headers(nullptr)); + + aom_fixed_buf_t *glob_headers = aom_codec_get_global_headers(&enc); + EXPECT_NE(glob_headers->buf, nullptr); + if (glob_headers) { + free(glob_headers->buf); + free(glob_headers); + } + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(EncodeAPI, InvalidControlId) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_ctx_t enc; + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_ERROR, aom_codec_control(&enc, -1, 0)); + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_control(&enc, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +void EncodeSetSFrameOnFirstFrame(aom_img_fmt fmt, aom_codec_flags_t flag) { + constexpr int kWidth = 2; + constexpr int kHeight = 128; + unsigned char kBuffer[kWidth * kHeight * 3] = { 0 }; + aom_image_t img; + ASSERT_EQ(aom_img_wrap(&img, fmt, kWidth, kHeight, 1, kBuffer), &img); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK); + cfg.g_w = kWidth; + cfg.g_h = kHeight; + + aom_codec_ctx_t enc; + ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, flag), AOM_CODEC_OK); + // One of these aom_codec_encode() calls should fail. + if (aom_codec_encode(&enc, &img, 0, 1, AOM_EFLAG_SET_S_FRAME) == + AOM_CODEC_OK) { + EXPECT_NE(aom_codec_encode(&enc, nullptr, 0, 0, 0), AOM_CODEC_OK); + } + EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +TEST(EncodeAPI, SetSFrameOnFirstFrame) { + EncodeSetSFrameOnFirstFrame(AOM_IMG_FMT_I420, 0); +} + +#if CONFIG_AV1_HIGHBITDEPTH +TEST(EncodeAPI, SetSFrameOnFirstFrameHighbd) { + EncodeSetSFrameOnFirstFrame(AOM_IMG_FMT_I42016, AOM_CODEC_USE_HIGHBITDEPTH); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +TEST(EncodeAPI, MonochromeInProfiles) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + cfg.g_w = 128; + cfg.g_h = 128; + cfg.monochrome = 1; + aom_codec_ctx_t enc; + + // Test Profile 0 + cfg.g_profile = 0; + ASSERT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); + + // Test Profile 1 + cfg.g_profile = 1; + ASSERT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); + + // Test Profile 3 + cfg.g_profile = 2; + ASSERT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(EncodeAPI, LowBDEncoderLowBDImage) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK); + + aom_codec_ctx_t enc; + ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK); + + aom_image_t *image = + aom_img_alloc(nullptr, AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h, 0); + ASSERT_NE(image, nullptr); + + // Set the image to two colors so that av1_set_screen_content_options() will + // call av1_get_perpixel_variance(). + int luma_value = 0; + for (unsigned int i = 0; i < image->d_h; ++i) { + memset(image->planes[0] + i * image->stride[0], luma_value, image->d_w); + luma_value = 255 - luma_value; + } + unsigned int uv_h = (image->d_h + 1) / 2; + unsigned int uv_w = (image->d_w + 1) / 2; + for (unsigned int i = 0; i < uv_h; ++i) { + memset(image->planes[1] + i * image->stride[1], 128, uv_w); + memset(image->planes[2] + i * image->stride[2], 128, uv_w); + } + + ASSERT_EQ(aom_codec_encode(&enc, image, 0, 1, 0), AOM_CODEC_OK); + + aom_img_free(image); + ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +TEST(EncodeAPI, HighBDEncoderHighBDImage) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK); + + aom_codec_ctx_t enc; + aom_codec_err_t init_status = + aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH); +#if !CONFIG_AV1_HIGHBITDEPTH + ASSERT_EQ(init_status, AOM_CODEC_INCAPABLE); +#else + ASSERT_EQ(init_status, AOM_CODEC_OK); + + aom_image_t *image = + aom_img_alloc(nullptr, AOM_IMG_FMT_I42016, cfg.g_w, cfg.g_h, 0); + ASSERT_NE(image, nullptr); + + // Set the image to two colors so that av1_set_screen_content_options() will + // call av1_get_perpixel_variance(). + int luma_value = 0; + for (unsigned int i = 0; i < image->d_h; ++i) { + Memset16(image->planes[0] + i * image->stride[0], luma_value, image->d_w); + luma_value = 255 - luma_value; + } + unsigned int uv_h = (image->d_h + 1) / 2; + unsigned int uv_w = (image->d_w + 1) / 2; + for (unsigned int i = 0; i < uv_h; ++i) { + Memset16(image->planes[1] + i * image->stride[1], 128, uv_w); + Memset16(image->planes[2] + i * image->stride[2], 128, uv_w); + } + + ASSERT_EQ(aom_codec_encode(&enc, image, 0, 1, 0), AOM_CODEC_OK); + + aom_img_free(image); + ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +#endif +} + +TEST(EncodeAPI, HighBDEncoderLowBDImage) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK); + + aom_codec_ctx_t enc; + aom_codec_err_t init_status = + aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH); +#if !CONFIG_AV1_HIGHBITDEPTH + ASSERT_EQ(init_status, AOM_CODEC_INCAPABLE); +#else + ASSERT_EQ(init_status, AOM_CODEC_OK); + + aom_image_t *image = + aom_img_alloc(nullptr, AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h, 0); + ASSERT_NE(image, nullptr); + + // Set the image to two colors so that av1_set_screen_content_options() will + // call av1_get_perpixel_variance(). + int luma_value = 0; + for (unsigned int i = 0; i < image->d_h; ++i) { + memset(image->planes[0] + i * image->stride[0], luma_value, image->d_w); + luma_value = 255 - luma_value; + } + unsigned int uv_h = (image->d_h + 1) / 2; + unsigned int uv_w = (image->d_w + 1) / 2; + for (unsigned int i = 0; i < uv_h; ++i) { + memset(image->planes[1] + i * image->stride[1], 128, uv_w); + memset(image->planes[2] + i * image->stride[2], 128, uv_w); + } + + ASSERT_EQ(aom_codec_encode(&enc, image, 0, 1, 0), AOM_CODEC_INVALID_PARAM); + + aom_img_free(image); + ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +#endif +} + +TEST(EncodeAPI, LowBDEncoderHighBDImage) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, kUsage), AOM_CODEC_OK); + + aom_codec_ctx_t enc; + ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK); + + aom_image_t *image = + aom_img_alloc(nullptr, AOM_IMG_FMT_I42016, cfg.g_w, cfg.g_h, 0); + ASSERT_NE(image, nullptr); + + // Set the image to two colors so that av1_set_screen_content_options() will + // call av1_get_perpixel_variance(). + int luma_value = 0; + for (unsigned int i = 0; i < image->d_h; ++i) { + Memset16(image->planes[0] + i * image->stride[0], luma_value, image->d_w); + luma_value = 255 - luma_value; + } + unsigned int uv_h = (image->d_h + 1) / 2; + unsigned int uv_w = (image->d_w + 1) / 2; + for (unsigned int i = 0; i < uv_h; ++i) { + Memset16(image->planes[1] + i * image->stride[1], 128, uv_w); + Memset16(image->planes[2] + i * image->stride[2], 128, uv_w); + } + + ASSERT_EQ(aom_codec_encode(&enc, image, 0, 1, 0), AOM_CODEC_INVALID_PARAM); + + aom_img_free(image); + ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +aom_image_t *CreateGrayImage(aom_img_fmt_t fmt, unsigned int w, + unsigned int h) { + aom_image_t *const image = aom_img_alloc(nullptr, fmt, w, h, 1); + if (!image) return image; + + for (unsigned int i = 0; i < image->d_h; ++i) { + memset(image->planes[0] + i * image->stride[0], 128, image->d_w); + } + const unsigned int uv_h = (image->d_h + 1) / 2; + const unsigned int uv_w = (image->d_w + 1) / 2; + for (unsigned int i = 0; i < uv_h; ++i) { + memset(image->planes[1] + i * image->stride[1], 128, uv_w); + memset(image->planes[2] + i * image->stride[2], 128, uv_w); + } + return image; +} + +TEST(EncodeAPI, Buganizer310548198) { + aom_codec_iface_t *const iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + const unsigned int usage = AOM_USAGE_REALTIME; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage), AOM_CODEC_OK); + cfg.g_w = 1; + cfg.g_h = 444; + cfg.g_pass = AOM_RC_ONE_PASS; + cfg.g_lag_in_frames = 0; + + aom_codec_ctx_t enc; + ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK); + + const int speed = 6; + ASSERT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, speed), AOM_CODEC_OK); + + const aom_enc_frame_flags_t flags = 0; + int frame_index = 0; + + // Encode a frame. + aom_image_t *image = CreateGrayImage(AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h); + ASSERT_NE(image, nullptr); + ASSERT_EQ(aom_codec_encode(&enc, image, frame_index, 1, flags), AOM_CODEC_OK); + frame_index++; + const aom_codec_cx_pkt_t *pkt; + aom_codec_iter_t iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + } + aom_img_free(image); + + cfg.g_w = 1; + cfg.g_h = 254; + ASSERT_EQ(aom_codec_enc_config_set(&enc, &cfg), AOM_CODEC_OK) + << aom_codec_error_detail(&enc); + + cfg.g_w = 1; + cfg.g_h = 154; + ASSERT_EQ(aom_codec_enc_config_set(&enc, &cfg), AOM_CODEC_OK) + << aom_codec_error_detail(&enc); + + // Encode a frame. + image = CreateGrayImage(AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h); + ASSERT_EQ(aom_codec_encode(&enc, image, frame_index, 1, flags), AOM_CODEC_OK); + frame_index++; + iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + } + aom_img_free(image); + + // Flush the encoder. + bool got_data; + do { + ASSERT_EQ(aom_codec_encode(&enc, nullptr, 0, 0, 0), AOM_CODEC_OK); + got_data = false; + iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + got_data = true; + } + } while (got_data); + + ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +// Emulates the WebCodecs VideoEncoder interface. +class AV1Encoder { + public: + explicit AV1Encoder(int speed) : speed_(speed) {} + ~AV1Encoder(); + + void Configure(unsigned int threads, unsigned int width, unsigned int height, + aom_rc_mode end_usage, unsigned int usage); + void Encode(bool key_frame); + + private: + // Flushes the encoder. Should be called after all the Encode() calls. + void Flush(); + + const int speed_; + bool initialized_ = false; + aom_codec_enc_cfg_t cfg_; + aom_codec_ctx_t enc_; + int frame_index_ = 0; +}; + +AV1Encoder::~AV1Encoder() { + if (initialized_) { + Flush(); + EXPECT_EQ(aom_codec_destroy(&enc_), AOM_CODEC_OK); + } +} + +void AV1Encoder::Configure(unsigned int threads, unsigned int width, + unsigned int height, aom_rc_mode end_usage, + unsigned int usage) { + if (!initialized_) { + aom_codec_iface_t *const iface = aom_codec_av1_cx(); + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg_, usage), AOM_CODEC_OK); + cfg_.g_threads = threads; + cfg_.g_w = width; + cfg_.g_h = height; + cfg_.g_forced_max_frame_width = cfg_.g_w; + cfg_.g_forced_max_frame_height = cfg_.g_h; + cfg_.g_timebase.num = 1; + cfg_.g_timebase.den = 1000 * 1000; // microseconds + cfg_.g_pass = AOM_RC_ONE_PASS; + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = end_usage; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 58; + ASSERT_EQ(aom_codec_enc_init(&enc_, iface, &cfg_, 0), AOM_CODEC_OK); + ASSERT_EQ(aom_codec_control(&enc_, AOME_SET_CPUUSED, speed_), AOM_CODEC_OK); + initialized_ = true; + return; + } + + ASSERT_EQ(usage, cfg_.g_usage); + cfg_.g_threads = threads; + cfg_.g_w = width; + cfg_.g_h = height; + cfg_.rc_end_usage = end_usage; + ASSERT_EQ(aom_codec_enc_config_set(&enc_, &cfg_), AOM_CODEC_OK) + << aom_codec_error_detail(&enc_); +} + +void AV1Encoder::Encode(bool key_frame) { + assert(initialized_); + // TODO(wtc): Support high bit depths and other YUV formats. + aom_image_t *const image = + CreateGrayImage(AOM_IMG_FMT_I420, cfg_.g_w, cfg_.g_h); + ASSERT_NE(image, nullptr); + const aom_enc_frame_flags_t flags = key_frame ? AOM_EFLAG_FORCE_KF : 0; + ASSERT_EQ(aom_codec_encode(&enc_, image, frame_index_, 1, flags), + AOM_CODEC_OK); + frame_index_++; + const aom_codec_cx_pkt_t *pkt; + aom_codec_iter_t iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc_, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + if (key_frame) { + ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + } + } + aom_img_free(image); +} + +void AV1Encoder::Flush() { + bool got_data; + do { + ASSERT_EQ(aom_codec_encode(&enc_, nullptr, 0, 0, 0), AOM_CODEC_OK); + got_data = false; + const aom_codec_cx_pkt_t *pkt; + aom_codec_iter_t iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc_, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + got_data = true; + } + } while (got_data); +} + +TEST(EncodeAPI, Buganizer314858909) { + AV1Encoder encoder(7); + + encoder.Configure(6, 1582, 750, AOM_CBR, AOM_USAGE_REALTIME); + + // Encode a frame. + encoder.Encode(false); + + encoder.Configure(0, 1582, 23, AOM_CBR, AOM_USAGE_REALTIME); + + // Encode a frame.. + encoder.Encode(false); + + encoder.Configure(16, 1542, 363, AOM_CBR, AOM_USAGE_REALTIME); + + // Encode a frame.. + encoder.Encode(false); +} + +// Run this test to reproduce the bug in fuzz test: ASSERT: cpi->rec_sse != +// UINT64_MAX in av1_rc_bits_per_mb. +TEST(EncodeAPI, Buganizer310766628) { + AV1Encoder encoder(7); + + encoder.Configure(16, 759, 383, AOM_CBR, AOM_USAGE_REALTIME); + + // Encode a frame. + encoder.Encode(false); + + encoder.Configure(2, 759, 383, AOM_VBR, AOM_USAGE_REALTIME); + + // Encode a frame. This will trigger the assertion failure. + encoder.Encode(false); +} + +// This test covers a possible use case where the change of frame sizes and +// thread numbers happens before and after the first frame coding. +TEST(EncodeAPI, Buganizer310455204) { + AV1Encoder encoder(7); + + encoder.Configure(0, 1915, 503, AOM_VBR, AOM_USAGE_REALTIME); + + encoder.Configure(4, 1, 1, AOM_VBR, AOM_USAGE_REALTIME); + + encoder.Configure(6, 559, 503, AOM_CBR, AOM_USAGE_REALTIME); + + // Encode a frame. + encoder.Encode(false); + + // Increase the number of threads. + encoder.Configure(16, 1915, 503, AOM_CBR, AOM_USAGE_REALTIME); + + // Encode a frame. + encoder.Encode(false); +} + +// Run this test to reproduce the bug in fuzz test: Float-cast-overflow in +// av1_rc_bits_per_mb. +TEST(EncodeAPI, Buganizer310457427) { + AV1Encoder encoder(7); + + encoder.Configure(12, 896, 1076, AOM_CBR, AOM_USAGE_REALTIME); + + encoder.Configure(6, 609, 1076, AOM_VBR, AOM_USAGE_REALTIME); + + // Encode a frame. + encoder.Encode(false); + + // Encode a frame. This will trigger the float-cast-overflow bug which was + // caused by division by zero. + encoder.Encode(false); +} + +class EncodeAPIParameterized + : public testing::TestWithParam<std::tuple< + /*usage=*/unsigned int, /*speed=*/int, /*aq_mode=*/unsigned int>> {}; + +// Encodes two frames at a given usage, speed, and aq_mode setting. +// Reproduces b/303023614 +TEST_P(EncodeAPIParameterized, HighBDEncoderHighBDFrames) { + const unsigned int usage = std::get<0>(GetParam()); + int speed = std::get<1>(GetParam()); + + if (speed == 10 && usage != AOM_USAGE_REALTIME) { + speed = 9; // 10 is only allowed in AOM_USAGE_REALTIME + } + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage), AOM_CODEC_OK); + cfg.g_w = 500; + cfg.g_h = 400; + + aom_codec_ctx_t enc; + aom_codec_err_t init_status = + aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH); +#if !CONFIG_AV1_HIGHBITDEPTH + ASSERT_EQ(init_status, AOM_CODEC_INCAPABLE); +#else + ASSERT_EQ(init_status, AOM_CODEC_OK); + + const unsigned int aq_mode = std::get<2>(GetParam()); + + ASSERT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, speed), AOM_CODEC_OK); + ASSERT_EQ(aom_codec_control(&enc, AV1E_SET_AQ_MODE, aq_mode), AOM_CODEC_OK); + + aom_image_t *image = + aom_img_alloc(nullptr, AOM_IMG_FMT_I42016, cfg.g_w, cfg.g_h, 0); + ASSERT_NE(image, nullptr); + + for (unsigned int i = 0; i < image->d_h; ++i) { + Memset16(image->planes[0] + i * image->stride[0], 128, image->d_w); + } + unsigned int uv_h = (image->d_h + 1) / 2; + unsigned int uv_w = (image->d_w + 1) / 2; + for (unsigned int i = 0; i < uv_h; ++i) { + Memset16(image->planes[1] + i * image->stride[1], 128, uv_w); + Memset16(image->planes[2] + i * image->stride[2], 128, uv_w); + } + + // Encode two frames. + ASSERT_EQ( + aom_codec_encode(&enc, image, /*pts=*/0, /*duration=*/1, /*flags=*/0), + AOM_CODEC_OK); + ASSERT_EQ( + aom_codec_encode(&enc, image, /*pts=*/1, /*duration=*/1, /*flags=*/0), + AOM_CODEC_OK); + + aom_img_free(image); + ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +#endif +} + +const unsigned int kUsages[] = { + AOM_USAGE_REALTIME, +#if !CONFIG_REALTIME_ONLY + AOM_USAGE_GOOD_QUALITY, + AOM_USAGE_ALL_INTRA, +#endif +}; + +INSTANTIATE_TEST_SUITE_P(All, EncodeAPIParameterized, + testing::Combine( + /*usage=*/testing::ValuesIn(kUsages), + /*speed=*/testing::Values(6, 7, 10), + /*aq_mode=*/testing::Values(0, 1, 2, 3))); + +#if !CONFIG_REALTIME_ONLY +TEST(EncodeAPI, AllIntraMode) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_ctx_t enc; + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); + + // Set g_lag_in_frames to a nonzero value. This should cause + // aom_codec_enc_init() to fail. + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA)); + cfg.g_lag_in_frames = 1; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); + + // Set kf_max_dist to a nonzero value. This should cause aom_codec_enc_init() + // to fail. + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA)); + cfg.kf_max_dist = 1; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); +} +#endif + +} // namespace diff --git a/third_party/aom/test/encode_perf_test.cc b/third_party/aom/test/encode_perf_test.cc new file mode 100644 index 0000000000..b52cf3392c --- /dev/null +++ b/third_party/aom/test/encode_perf_test.cc @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_version.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "aom_ports/aom_timer.h" + +namespace { + +const int kMaxPsnr = 100; +const double kUsecsInSec = 1000000.0; + +struct EncodePerfTestVideo { + EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_, + uint32_t bitrate_, int frames_) + : name(name_), width(width_), height(height_), bitrate(bitrate_), + frames(frames_) {} + const char *name; + uint32_t width; + uint32_t height; + uint32_t bitrate; + int frames; +}; + +const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = { + EncodePerfTestVideo("desktop_640_360_30.yuv", 640, 360, 200, 2484), + EncodePerfTestVideo("kirland_640_480_30.yuv", 640, 480, 200, 300), + EncodePerfTestVideo("macmarcomoving_640_480_30.yuv", 640, 480, 200, 987), + EncodePerfTestVideo("macmarcostationary_640_480_30.yuv", 640, 480, 200, 718), + EncodePerfTestVideo("niklas_640_480_30.yuv", 640, 480, 200, 471), + EncodePerfTestVideo("tacomanarrows_640_480_30.yuv", 640, 480, 200, 300), + EncodePerfTestVideo("tacomasmallcameramovement_640_480_30.yuv", 640, 480, 200, + 300), + EncodePerfTestVideo("thaloundeskmtg_640_480_30.yuv", 640, 480, 200, 300), + EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470), +}; + +const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 }; +const int kEncodePerfTestThreads[] = { 1, 2, 4 }; + +class AV1EncodePerfTest + : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>, + public ::libaom_test::EncoderTest { + protected: + AV1EncodePerfTest() + : EncoderTest(GET_PARAM(0)), min_psnr_(kMaxPsnr), nframes_(0), + encoding_mode_(GET_PARAM(1)), speed_(0), threads_(1) {} + + ~AV1EncodePerfTest() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + + cfg_.g_lag_in_frames = 0; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_error_resilient = 1; + cfg_.g_threads = threads_; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + const int log2_tile_columns = 3; + encoder->Control(AOME_SET_CPUUSED, speed_); + encoder->Control(AV1E_SET_TILE_COLUMNS, log2_tile_columns); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0); + } + } + + void BeginPassHook(unsigned int /*pass*/) override { + min_psnr_ = kMaxPsnr; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + if (pkt->data.psnr.psnr[0] < min_psnr_) { + min_psnr_ = pkt->data.psnr.psnr[0]; + } + } + + // for performance reasons don't decode + bool DoDecode() const override { return false; } + + double min_psnr() const { return min_psnr_; } + + void set_speed(unsigned int speed) { speed_ = speed; } + + void set_threads(unsigned int threads) { threads_ = threads; } + + private: + double min_psnr_; + unsigned int nframes_; + libaom_test::TestMode encoding_mode_; + unsigned speed_; + unsigned int threads_; +}; + +TEST_P(AV1EncodePerfTest, PerfTest) { + for (const EncodePerfTestVideo &test_video : kAV1EncodePerfTestVectors) { + for (int speed : kEncodePerfTestSpeeds) { + for (int threads : kEncodePerfTestThreads) { + if (test_video.width < 512 && threads > 1) + continue; + else if (test_video.width < 1024 && threads > 2) + continue; + + set_threads(threads); + SetUp(); + + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = test_video.bitrate; + + init_flags_ = AOM_CODEC_USE_PSNR; + + const unsigned frames = test_video.frames; + const char *video_name = test_video.name; + libaom_test::I420VideoSource video(video_name, test_video.width, + test_video.height, timebase.den, + timebase.num, 0, test_video.frames); + set_speed(speed); + + aom_usec_timer t; + aom_usec_timer_start(&t); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + aom_usec_timer_mark(&t); + const double elapsed_secs = aom_usec_timer_elapsed(&t) / kUsecsInSec; + const double fps = frames / elapsed_secs; + const double minimum_psnr = min_psnr(); + std::string display_name(video_name); + if (threads > 1) { + char thread_count[32]; + snprintf(thread_count, sizeof(thread_count), "_t-%d", threads); + display_name += thread_count; + } + + printf("{\n"); + printf("\t\"type\" : \"encode_perf_test\",\n"); + printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); + printf("\t\"videoName\" : \"%s\",\n", display_name.c_str()); + printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs); + printf("\t\"totalFrames\" : %u,\n", frames); + printf("\t\"framesPerSecond\" : %f,\n", fps); + printf("\t\"minPsnr\" : %f,\n", minimum_psnr); + printf("\t\"speed\" : %d,\n", speed); + printf("\t\"threads\" : %d\n", threads); + printf("}\n"); + } + } + } +} + +AV1_INSTANTIATE_TEST_SUITE(AV1EncodePerfTest, + ::testing::Values(::libaom_test::kRealTime)); +} // namespace diff --git a/third_party/aom/test/encode_small_width_height_test.cc b/third_party/aom/test/encode_small_width_height_test.cc new file mode 100644 index 0000000000..22f69396d9 --- /dev/null +++ b/third_party/aom/test/encode_small_width_height_test.cc @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Tests for https://crbug.com/aomedia/2777. +// +// Encode images with a small width (<= two AV1 superblocks) or a small height +// (<= one AV1 superblock) with multiple threads. aom_codec_encode() should +// not crash. + +#include <memory> + +#include "aom/aomcx.h" +#include "aom/aom_encoder.h" +#include "config/aom_config.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +// Dummy buffer of zero samples. +constexpr unsigned char kBuffer[2 * (256 * 512 + 2 * 128 * 256)] = { 0 }; +#if CONFIG_REALTIME_ONLY +const int kUsage = 1; +#else +const int kUsage = 0; +#endif + +void EncodeSmallWidthMultiThreaded(aom_img_fmt fmt, aom_codec_flags_t flag) { + // The image has only one tile and the tile is two AV1 superblocks wide. + // For speed >= 1, superblock size is 64x64 (see av1_select_sb_size()). + constexpr int kWidth = 128; + constexpr int kHeight = 512; + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, fmt, kWidth, kHeight, 1, + const_cast<unsigned char *>(kBuffer))); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + cfg.g_threads = 2; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flag)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(EncodeSmallWidthHeight, SmallWidthMultiThreaded) { + EncodeSmallWidthMultiThreaded(AOM_IMG_FMT_I420, 0); +} + +#if CONFIG_AV1_HIGHBITDEPTH +TEST(HighbdEncodeSmallWidthHeight, SmallWidthMultiThreaded) { + EncodeSmallWidthMultiThreaded(AOM_IMG_FMT_I42016, AOM_CODEC_USE_HIGHBITDEPTH); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +#if !CONFIG_REALTIME_ONLY +void EncodeSmallWidthMultiThreadedSpeed0(aom_img_fmt fmt, + aom_codec_flags_t flag) { + // The image has only one tile and the tile is two AV1 superblocks wide. + // For speed 0, superblock size is 128x128 (see av1_select_sb_size()). + constexpr int kWidth = 256; + constexpr int kHeight = 512; + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, fmt, kWidth, kHeight, 1, + const_cast<unsigned char *>(kBuffer))); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, 0)); + cfg.g_threads = 2; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flag)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(EncodeSmallWidthHeight, SmallWidthMultiThreadedSpeed0) { + EncodeSmallWidthMultiThreadedSpeed0(AOM_IMG_FMT_I420, 0); +} + +#if CONFIG_AV1_HIGHBITDEPTH +TEST(HighbdEncodeSmallWidthHeight, SmallWidthMultiThreadedSpeed0) { + EncodeSmallWidthMultiThreadedSpeed0(AOM_IMG_FMT_I42016, + AOM_CODEC_USE_HIGHBITDEPTH); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +#endif + +void EncodeSmallHeightMultiThreaded(aom_img_fmt fmt, aom_codec_flags_t flag) { + // The image has only one tile and the tile is one AV1 superblock tall. + // For speed >= 1, superblock size is 64x64 (see av1_select_sb_size()). + constexpr int kWidth = 512; + constexpr int kHeight = 64; + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, fmt, kWidth, kHeight, 1, + const_cast<unsigned char *>(kBuffer))); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + cfg.g_threads = 2; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flag)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(EncodeSmallWidthHeight, SmallHeightMultiThreaded) { + EncodeSmallHeightMultiThreaded(AOM_IMG_FMT_I420, 0); +} + +#if CONFIG_AV1_HIGHBITDEPTH +TEST(HighbdEncodeSmallWidthHeight, SmallHeightMultiThreaded) { + EncodeSmallHeightMultiThreaded(AOM_IMG_FMT_I42016, + AOM_CODEC_USE_HIGHBITDEPTH); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +#if !CONFIG_REALTIME_ONLY +void EncodeSmallHeightMultiThreadedSpeed0(aom_img_fmt fmt, + aom_codec_flags_t flag) { + // The image has only one tile and the tile is one AV1 superblock tall. + // For speed 0, superblock size is 128x128 (see av1_select_sb_size()). + constexpr int kWidth = 512; + constexpr int kHeight = 128; + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, fmt, kWidth, kHeight, 1, + const_cast<unsigned char *>(kBuffer))); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, 0)); + cfg.g_threads = 2; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flag)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(EncodeSmallWidthHeight, SmallHeightMultiThreadedSpeed0) { + EncodeSmallHeightMultiThreadedSpeed0(AOM_IMG_FMT_I420, 0); +} + +#if CONFIG_AV1_HIGHBITDEPTH +TEST(HighbdEncodeSmallWidthHeight, SmallHeightMultiThreadedSpeed0) { + EncodeSmallHeightMultiThreadedSpeed0(AOM_IMG_FMT_I42016, + AOM_CODEC_USE_HIGHBITDEPTH); +} +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif + +// A reproducer test for aomedia:3113. The test should complete without any +// memory errors. +void Encode1x1(aom_img_fmt fmt, int bitdepth, aom_codec_flags_t flags) { + constexpr int kWidth = 1; + constexpr int kHeight = 1; + + // This test cannot use aom_img_alloc() or aom_img_wrap() because they call + // align_image_dimension() to align img.w and img.h to the next even number + // (2). In this test it is important to set img.w and img.h to 1. Therefore we + // set up img manually. + aom_image_t img; + memset(&img, 0, sizeof(img)); + img.fmt = fmt; + img.bit_depth = bitdepth; + img.w = kWidth; + img.h = kHeight; + img.d_w = kWidth; + img.d_h = kHeight; + img.x_chroma_shift = 1; + img.y_chroma_shift = 1; + img.bps = 12; + const int y_stride = kWidth; + const int uv_stride = (kWidth + 1) >> 1; + int y_height = kHeight; + int uv_height = (kHeight + 1) >> 1; + if (bitdepth > 8) { + y_height <<= 1; + uv_height <<= 1; + } + img.stride[AOM_PLANE_Y] = y_stride; + img.stride[AOM_PLANE_U] = img.stride[AOM_PLANE_V] = uv_stride; + std::unique_ptr<unsigned char[]> y_plane( + new unsigned char[y_height * y_stride]()); + ASSERT_NE(y_plane, nullptr); + std::unique_ptr<unsigned char[]> u_plane( + new unsigned char[uv_height * uv_stride]()); + ASSERT_NE(u_plane, nullptr); + std::unique_ptr<unsigned char[]> v_plane( + new unsigned char[uv_height * uv_stride]()); + ASSERT_NE(v_plane, nullptr); + img.planes[AOM_PLANE_Y] = y_plane.get(); + img.planes[AOM_PLANE_U] = u_plane.get(); + img.planes[AOM_PLANE_V] = v_plane.get(); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, kUsage)); + cfg.g_w = kWidth; + cfg.g_h = kHeight; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, flags)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(EncodeSmallWidthHeight, 1x1) { Encode1x1(AOM_IMG_FMT_I420, 8, 0); } + +#if CONFIG_AV1_HIGHBITDEPTH +TEST(HighbdEncodeSmallWidthHeight, 1x1) { + Encode1x1(AOM_IMG_FMT_I42016, 12, AOM_CODEC_USE_HIGHBITDEPTH); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/encode_test_driver.cc b/third_party/aom/test/encode_test_driver.cc new file mode 100644 index 0000000000..b5c506c6d3 --- /dev/null +++ b/third_party/aom/test/encode_test_driver.cc @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#include "aom_ports/mem.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/encode_test_driver.h" +#include "test/register_state_check.h" +#include "test/video_source.h" + +namespace libaom_test { +void Encoder::InitEncoder(VideoSource *video) { + aom_codec_err_t res; + const aom_image_t *img = video->img(); + + if (video->img() && !encoder_.priv) { + cfg_.g_w = img->d_w; + cfg_.g_h = img->d_h; + cfg_.g_timebase = video->timebase(); + cfg_.rc_twopass_stats_in = stats_->buf(); + + res = aom_codec_enc_init(&encoder_, CodecInterface(), &cfg_, init_flags_); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } +} + +void Encoder::EncodeFrame(VideoSource *video, + const aom_enc_frame_flags_t frame_flags) { + if (video->img()) + EncodeFrameInternal(*video, frame_flags); + else + Flush(); + + // Handle twopass stats + CxDataIterator iter = GetCxData(); + + while (const aom_codec_cx_pkt_t *pkt = iter.Next()) { + if (pkt->kind != AOM_CODEC_STATS_PKT) continue; + + stats_->Append(*pkt); + } +} + +void Encoder::EncodeFrameInternal(const VideoSource &video, + const aom_enc_frame_flags_t frame_flags) { + aom_codec_err_t res; + const aom_image_t *img = video.img(); + + // Handle frame resizing + if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) { + cfg_.g_w = img->d_w; + cfg_.g_h = img->d_h; + res = aom_codec_enc_config_set(&encoder_, &cfg_); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + // Encode the frame + API_REGISTER_STATE_CHECK(res = + aom_codec_encode(&encoder_, img, video.pts(), + video.duration(), frame_flags)); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); +} + +void Encoder::Flush() { + const aom_codec_err_t res = aom_codec_encode(&encoder_, nullptr, 0, 0, 0); + if (!encoder_.priv) + ASSERT_EQ(AOM_CODEC_ERROR, res) << EncoderError(); + else + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); +} + +void EncoderTest::InitializeConfig(TestMode mode) { + int usage = AOM_USAGE_GOOD_QUALITY; + switch (mode) { + case kOnePassGood: + case kTwoPassGood: break; + case kRealTime: usage = AOM_USAGE_REALTIME; break; + case kAllIntra: usage = AOM_USAGE_ALL_INTRA; break; + default: ASSERT_TRUE(false) << "Unexpected mode " << mode; + } + mode_ = mode; + passes_ = (mode == kTwoPassGood) ? 2 : 1; + + const aom_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, usage); + ASSERT_EQ(AOM_CODEC_OK, res); +} + +static bool compare_plane(const uint8_t *const buf1, int stride1, + const uint8_t *const buf2, int stride2, int w, int h, + int *const mismatch_row, int *const mismatch_col, + int *const mismatch_pix1, int *const mismatch_pix2) { + int r, c; + + for (r = 0; r < h; ++r) { + for (c = 0; c < w; ++c) { + const int pix1 = buf1[r * stride1 + c]; + const int pix2 = buf2[r * stride2 + c]; + + if (pix1 != pix2) { + if (mismatch_row != nullptr) *mismatch_row = r; + if (mismatch_col != nullptr) *mismatch_col = c; + if (mismatch_pix1 != nullptr) *mismatch_pix1 = pix1; + if (mismatch_pix2 != nullptr) *mismatch_pix2 = pix2; + return false; + } + } + } + + return true; +} + +// The function should return "true" most of the time, therefore no early +// break-out is implemented within the match checking process. +static bool compare_img(const aom_image_t *img1, const aom_image_t *img2, + int *const mismatch_row, int *const mismatch_col, + int *const mismatch_plane, int *const mismatch_pix1, + int *const mismatch_pix2) { + if (img1->fmt != img2->fmt || img1->cp != img2->cp || img1->tc != img2->tc || + img1->mc != img2->mc || img1->d_w != img2->d_w || + img1->d_h != img2->d_h || img1->monochrome != img2->monochrome) { + if (mismatch_row != nullptr) *mismatch_row = -1; + if (mismatch_col != nullptr) *mismatch_col = -1; + return false; + } + + const int num_planes = img1->monochrome ? 1 : 3; + for (int plane = 0; plane < num_planes; plane++) { + if (!compare_plane(img1->planes[plane], img1->stride[plane], + img2->planes[plane], img2->stride[plane], + aom_img_plane_width(img1, plane), + aom_img_plane_height(img1, plane), mismatch_row, + mismatch_col, mismatch_pix1, mismatch_pix2)) { + if (mismatch_plane != nullptr) *mismatch_plane = plane; + return false; + } + } + + return true; +} + +void EncoderTest::MismatchHook(const aom_image_t *img_enc, + const aom_image_t *img_dec) { + int mismatch_row = 0; + int mismatch_col = 0; + int mismatch_plane = 0; + int mismatch_pix_enc = 0; + int mismatch_pix_dec = 0; + + ASSERT_FALSE(compare_img(img_enc, img_dec, &mismatch_row, &mismatch_col, + &mismatch_plane, &mismatch_pix_enc, + &mismatch_pix_dec)); + + GTEST_FAIL() << "Encode/Decode mismatch found:" << std::endl + << " pixel value enc/dec: " << mismatch_pix_enc << "/" + << mismatch_pix_dec << std::endl + << " plane: " << mismatch_plane << std::endl + << " row/col: " << mismatch_row << "/" + << mismatch_col << std::endl; +} + +void EncoderTest::RunLoop(VideoSource *video) { + stats_.Reset(); + + ASSERT_TRUE(passes_ == 1 || passes_ == 2); + for (unsigned int pass = 0; pass < passes_; pass++) { + aom_codec_pts_t last_pts = 0; + + if (passes_ == 1) + cfg_.g_pass = AOM_RC_ONE_PASS; + else if (pass == 0) + cfg_.g_pass = AOM_RC_FIRST_PASS; + else + cfg_.g_pass = AOM_RC_LAST_PASS; + + BeginPassHook(pass); + std::unique_ptr<Encoder> encoder( + codec_->CreateEncoder(cfg_, init_flags_, &stats_)); + ASSERT_NE(encoder, nullptr); + + ASSERT_NO_FATAL_FAILURE(video->Begin()); + encoder->InitEncoder(video); + + if (mode_ == kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0); + } + + ASSERT_FALSE(::testing::Test::HasFatalFailure()); +#if CONFIG_AV1_DECODER + aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t(); + dec_cfg.allow_lowbitdepth = 1; + std::unique_ptr<Decoder> decoder( + codec_->CreateDecoder(dec_cfg, 0 /* flags */)); + if (decoder->IsAV1()) { + // Set dec_cfg.tile_row = -1 and dec_cfg.tile_col = -1 so that the whole + // frame is decoded. + decoder->Control(AV1_SET_TILE_MODE, cfg_.large_scale_tile); + decoder->Control(AV1D_EXT_TILE_DEBUG, 1); + decoder->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder->Control(AV1_SET_DECODE_TILE_COL, -1); + } +#endif + + int number_spatial_layers = GetNumSpatialLayers(); + + bool again; + for (again = true; again; video->Next()) { + again = (video->img() != nullptr); + + for (int sl = 0; sl < number_spatial_layers; sl++) { + PreEncodeFrameHook(video, encoder.get()); + encoder->EncodeFrame(video, frame_flags_); + PostEncodeFrameHook(encoder.get()); + CxDataIterator iter = encoder->GetCxData(); + bool has_cxdata = false; + +#if CONFIG_AV1_DECODER + bool has_dxdata = false; +#endif + while (const aom_codec_cx_pkt_t *pkt = iter.Next()) { + pkt = MutateEncoderOutputHook(pkt); + again = true; + switch (pkt->kind) { + case AOM_CODEC_CX_FRAME_PKT: // + has_cxdata = true; +#if CONFIG_AV1_DECODER + if (decoder.get() != nullptr && DoDecode()) { + aom_codec_err_t res_dec; + if (DoDecodeInvisible()) { + res_dec = decoder->DecodeFrame( + (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz); + } else { + res_dec = decoder->DecodeFrame( + (const uint8_t *)pkt->data.frame.buf + + (pkt->data.frame.sz - pkt->data.frame.vis_frame_size), + pkt->data.frame.vis_frame_size); + } + + if (!HandleDecodeResult(res_dec, decoder.get())) break; + + has_dxdata = true; + } +#endif + ASSERT_GE(pkt->data.frame.pts, last_pts); + if (sl == number_spatial_layers - 1) + last_pts = pkt->data.frame.pts; + FramePktHook(pkt); + break; + + case AOM_CODEC_PSNR_PKT: PSNRPktHook(pkt); break; + + case AOM_CODEC_STATS_PKT: StatsPktHook(pkt); break; + + default: break; + } + } + if (has_cxdata) { + const aom_image_t *img_enc = encoder->GetPreviewFrame(); + if (img_enc) { + CalculateFrameLevelSSIM(video->img(), img_enc, cfg_.g_bit_depth, + cfg_.g_input_bit_depth); + } +#if CONFIG_AV1_DECODER + if (has_dxdata) { + DxDataIterator dec_iter = decoder->GetDxData(); + const aom_image_t *img_dec = dec_iter.Next(); + if (img_enc && img_dec) { + const bool res = compare_img(img_enc, img_dec, nullptr, nullptr, + nullptr, nullptr, nullptr); + if (!res) { // Mismatch + MismatchHook(img_enc, img_dec); + } + } + if (img_dec) DecompressedFrameHook(*img_dec, video->pts()); + } +#endif + } + if (!Continue()) break; + } // Loop over spatial layers + } + + EndPassHook(); + + if (!Continue()) break; + } +} + +} // namespace libaom_test diff --git a/third_party/aom/test/encode_test_driver.h b/third_party/aom/test/encode_test_driver.h new file mode 100644 index 0000000000..d1e6615cd7 --- /dev/null +++ b/third_party/aom/test/encode_test_driver.h @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef AOM_TEST_ENCODE_TEST_DRIVER_H_ +#define AOM_TEST_ENCODE_TEST_DRIVER_H_ + +#include <string> +#include <vector> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#if CONFIG_AV1_ENCODER +#include "aom/aomcx.h" +#endif +#include "aom/aom_encoder.h" + +namespace libaom_test { + +class CodecFactory; +class VideoSource; + +enum TestMode { kRealTime, kOnePassGood, kTwoPassGood, kAllIntra }; +#define ALL_TEST_MODES \ + ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood, \ + ::libaom_test::kTwoPassGood) + +#define ONE_PASS_TEST_MODES \ + ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood) + +#define TWO_PASS_TEST_MODES ::testing::Values(::libaom_test::kTwoPassGood) + +#define NONREALTIME_TEST_MODES \ + ::testing::Values(::libaom_test::kOnePassGood, ::libaom_test::kTwoPassGood) + +// Provides an object to handle the libaom get_cx_data() iteration pattern +class CxDataIterator { + public: + explicit CxDataIterator(aom_codec_ctx_t *encoder) + : encoder_(encoder), iter_(nullptr) {} + + const aom_codec_cx_pkt_t *Next() { + return aom_codec_get_cx_data(encoder_, &iter_); + } + + private: + aom_codec_ctx_t *encoder_; + aom_codec_iter_t iter_; +}; + +// Implements an in-memory store for libaom twopass statistics +class TwopassStatsStore { + public: + void Append(const aom_codec_cx_pkt_t &pkt) { + buffer_.append(reinterpret_cast<char *>(pkt.data.twopass_stats.buf), + pkt.data.twopass_stats.sz); + } + + aom_fixed_buf_t buf() { + const aom_fixed_buf_t buf = { &buffer_[0], buffer_.size() }; + return buf; + } + + void Reset() { buffer_.clear(); } + + protected: + std::string buffer_; +}; + +// Provides a simplified interface to manage one video encoding pass, given +// a configuration and video source. +// +// TODO(jkoleszar): The exact services it provides and the appropriate +// level of abstraction will be fleshed out as more tests are written. +class Encoder { + public: + Encoder(aom_codec_enc_cfg_t cfg, const aom_codec_flags_t init_flags, + TwopassStatsStore *stats) + : cfg_(cfg), init_flags_(init_flags), stats_(stats) { + memset(&encoder_, 0, sizeof(encoder_)); + } + + virtual ~Encoder() { aom_codec_destroy(&encoder_); } + + CxDataIterator GetCxData() { return CxDataIterator(&encoder_); } + + void InitEncoder(VideoSource *video); + + const aom_image_t *GetPreviewFrame() { + return aom_codec_get_preview_frame(&encoder_); + } + // This is a thin wrapper around aom_codec_encode(), so refer to + // aom_encoder.h for its semantics. + void EncodeFrame(VideoSource *video, aom_enc_frame_flags_t frame_flags); + + // Convenience wrapper for EncodeFrame() + void EncodeFrame(VideoSource *video) { EncodeFrame(video, 0); } + + void Control(int ctrl_id, int arg) { + const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, int *arg) { + const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, struct aom_scaling_mode *arg) { + const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, struct aom_svc_layer_id *arg) { + const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, struct aom_svc_ref_frame_config *arg) { + const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, struct aom_svc_ref_frame_comp_pred *arg) { + const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, struct aom_svc_params *arg) { + const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, struct aom_ext_part_funcs *arg) { + const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + +#if CONFIG_AV1_ENCODER + void Control(int ctrl_id, aom_active_map_t *arg) { + const aom_codec_err_t res = aom_codec_control(&encoder_, ctrl_id, arg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } +#endif + + void SetOption(const char *name, const char *value) { + const aom_codec_err_t res = aom_codec_set_option(&encoder_, name, value); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + } + + void Config(const aom_codec_enc_cfg_t *cfg) { + const aom_codec_err_t res = aom_codec_enc_config_set(&encoder_, cfg); + ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError(); + cfg_ = *cfg; + } + + protected: + virtual aom_codec_iface_t *CodecInterface() const = 0; + + const char *EncoderError() { + const char *detail = aom_codec_error_detail(&encoder_); + return detail ? detail : aom_codec_error(&encoder_); + } + + // Encode an image + void EncodeFrameInternal(const VideoSource &video, + aom_enc_frame_flags_t frame_flags); + + // Flush the encoder on EOS + void Flush(); + + aom_codec_ctx_t encoder_; + aom_codec_enc_cfg_t cfg_; + aom_codec_flags_t init_flags_; + TwopassStatsStore *stats_; +}; + +// Common test functionality for all Encoder tests. +// +// This class is a mixin which provides the main loop common to all +// encoder tests. It provides hooks which can be overridden by subclasses +// to implement each test's specific behavior, while centralizing the bulk +// of the boilerplate. Note that it doesn't inherit the gtest testing +// classes directly, so that tests can be parameterized differently. +class EncoderTest { + protected: + explicit EncoderTest(const CodecFactory *codec) + : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0), + mode_(kRealTime) { + // Default to 1 thread. + cfg_.g_threads = 1; + } + + virtual ~EncoderTest() = default; + + // Initialize the cfg_ member with the default configuration for the + // TestMode enum and maps the TestMode enum to the passes_ variable. + void InitializeConfig(TestMode mode); + + // Set encoder flag. + void set_init_flags(aom_codec_flags_t flag) { init_flags_ = flag; } + + // Main loop + virtual void RunLoop(VideoSource *video); + + // Hook to be called at the beginning of a pass. + virtual void BeginPassHook(unsigned int /*pass*/) {} + + // Hook to be called at the end of a pass. + virtual void EndPassHook() {} + + // Hook to be called before encoding a frame. + virtual void PreEncodeFrameHook(VideoSource * /*video*/, + Encoder * /*encoder*/) {} + + virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {} + + // Hook to be called on every compressed data packet. + virtual void FramePktHook(const aom_codec_cx_pkt_t * /*pkt*/) {} + + // Hook to be called on every PSNR packet. + virtual void PSNRPktHook(const aom_codec_cx_pkt_t * /*pkt*/) {} + + // Hook to be called on every first pass stats packet. + virtual void StatsPktHook(const aom_codec_cx_pkt_t * /*pkt*/) {} + + // Calculates SSIM at frame level. + virtual void CalculateFrameLevelSSIM(const aom_image_t * /*img_src*/, + const aom_image_t * /*img_enc*/, + aom_bit_depth_t /*bit_depth*/, + unsigned int /*input_bit_depth*/) {} + + // Hook to determine whether the encode loop should continue. + virtual bool Continue() const { + return !(::testing::Test::HasFatalFailure() || abort_); + } + + // Hook to determine whether to decode frame after encoding + virtual bool DoDecode() const { return true; } + + // Hook to determine whether to decode invisible frames after encoding + virtual bool DoDecodeInvisible() const { return true; } + + // Hook to handle encode/decode mismatch + virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2); + + // Hook to be called on every decompressed frame. + virtual void DecompressedFrameHook(const aom_image_t & /*img*/, + aom_codec_pts_t /*pts*/) {} + + // Hook to be called to handle decode result. Return true to continue. + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, + Decoder *decoder) { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + return AOM_CODEC_OK == res_dec; + } + + virtual int GetNumSpatialLayers() { return 1; } + + // Hook that can modify the encoder's output data + virtual const aom_codec_cx_pkt_t *MutateEncoderOutputHook( + const aom_codec_cx_pkt_t *pkt) { + return pkt; + } + + const CodecFactory *codec_; + bool abort_; + aom_codec_enc_cfg_t cfg_; + unsigned int passes_; + TwopassStatsStore stats_; + aom_codec_flags_t init_flags_; + aom_enc_frame_flags_t frame_flags_; + TestMode mode_; +}; + +} // namespace libaom_test + +#endif // AOM_TEST_ENCODE_TEST_DRIVER_H_ diff --git a/third_party/aom/test/encodemb_test.cc b/third_party/aom/test/encodemb_test.cc new file mode 100644 index 0000000000..6165fc33f5 --- /dev/null +++ b/third_party/aom/test/encodemb_test.cc @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdint.h> +#include <vector> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "av1/encoder/block.h" +#include "av1/encoder/encodemb.h" +#include "av1/common/scan.h" + +namespace { + +// Reorders 'qcoeff_lexico', which is in lexicographic order (row by row), into +// scan order (zigzag) in 'qcoeff_scan'. +void ToScanOrder(TX_SIZE tx_size, TX_TYPE tx_type, tran_low_t *qcoeff_lexico, + tran_low_t *qcoeff_scan) { + const int max_eob = av1_get_max_eob(tx_size); + const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); + for (int i = 0; i < max_eob; ++i) { + qcoeff_scan[i] = qcoeff_lexico[scan_order->scan[i]]; + } +} + +// Reorders 'qcoeff_scan', which is in scan order (zigzag), into lexicographic +// order (row by row) in 'qcoeff_lexico'. +void ToLexicoOrder(TX_SIZE tx_size, TX_TYPE tx_type, tran_low_t *qcoeff_scan, + tran_low_t *qcoeff_lexico) { + const int max_eob = av1_get_max_eob(tx_size); + const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); + for (int i = 0; i < max_eob; ++i) { + qcoeff_lexico[scan_order->scan[i]] = qcoeff_scan[i]; + } +} + +// Runs coefficient dropout on 'qcoeff_scan'. +void Dropout(TX_SIZE tx_size, TX_TYPE tx_type, int dropout_num_before, + int dropout_num_after, tran_low_t *qcoeff_scan) { + tran_low_t qcoeff[MAX_TX_SQUARE]; + // qcoeff_scan is assumed to be in scan order, since tests are easier to + // understand this way, but av1_dropout_qcoeff expects coeffs in lexico order + // so we convert to lexico then back to scan afterwards. + ToLexicoOrder(tx_size, tx_type, qcoeff_scan, qcoeff); + + const int max_eob = av1_get_max_eob(tx_size); + const int kDequantFactor = 10; + tran_low_t dqcoeff[MAX_TX_SQUARE]; + for (int i = 0; i < max_eob; ++i) { + dqcoeff[i] = qcoeff[i] * kDequantFactor; + } + + uint16_t eob = max_eob; + while (eob > 0 && qcoeff_scan[eob - 1] == 0) --eob; + + MACROBLOCK mb; + const int kPlane = 0; + const int kBlock = 0; + memset(&mb, 0, sizeof(mb)); + uint16_t eobs[] = { eob }; + mb.plane[kPlane].eobs = eobs; + mb.plane[kPlane].qcoeff = qcoeff; + mb.plane[kPlane].dqcoeff = dqcoeff; + uint8_t txb_entropy_ctx[1]; + mb.plane[kPlane].txb_entropy_ctx = txb_entropy_ctx; + + av1_dropout_qcoeff_num(&mb, kPlane, kBlock, tx_size, tx_type, + dropout_num_before, dropout_num_after); + + ToScanOrder(tx_size, tx_type, qcoeff, qcoeff_scan); + + // Check updated eob value is valid. + uint16_t new_eob = max_eob; + while (new_eob > 0 && qcoeff_scan[new_eob - 1] == 0) --new_eob; + EXPECT_EQ(new_eob, mb.plane[kPlane].eobs[0]); + + // Check dqcoeff is still valid. + for (int i = 0; i < max_eob; ++i) { + EXPECT_EQ(qcoeff[i] * kDequantFactor, dqcoeff[i]); + } +} + +void ExpectArrayEq(tran_low_t *actual, std::vector<tran_low_t> expected) { + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_EQ(expected[i], actual[i]) << "Arrays differ at index " << i; + } +} + +static constexpr TX_TYPE kTxType = DCT_DCT; + +TEST(DropoutTest, KeepsLargeCoeffs) { + const TX_SIZE tx_size = TX_8X4; + const uint32_t dropout_num_before = 4; + const uint32_t dropout_num_after = 6; + // Large isolated coeffs should be preserved. + tran_low_t qcoeff_scan[] = { 0, 0, 0, 0, 0, 0, 42, 0, // should be kept + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, -30, // should be kept + 0, 0, 0, 0, 0, 0, 0, 0 }; + Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan); + ExpectArrayEq(qcoeff_scan, { 0, 0, 0, 0, 0, 0, 42, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, -30, // + 0, 0, 0, 0, 0, 0, 0, 0 }); +} + +TEST(DropoutTest, RemovesSmallIsolatedCoeffs) { + const TX_SIZE tx_size = TX_8X4; + const uint32_t dropout_num_before = 4; + const uint32_t dropout_num_after = 6; + // Small isolated coeffs should be removed. + tran_low_t qcoeff_scan[] = { 0, 0, 0, 0, 1, 0, 0, 0, // should be removed + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, -2, 0, 0, 0, // should be removed + 0, 0, 0, 0, 0, 0, 0, 0 }; + Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan); + ExpectArrayEq(qcoeff_scan, { 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0 }); +} + +TEST(DropoutTest, KeepsSmallCoeffsAmongLargeOnes) { + const TX_SIZE tx_size = TX_8X4; + const uint32_t dropout_num_before = 4; + const uint32_t dropout_num_after = 6; + // Small coeffs that are not isolated (not enough zeros before/after should be + // kept). + tran_low_t qcoeff_scan[] = { + 1, 0, 0, 0, -5, 0, 0, -1, // should be kept + 0, 0, 0, 10, 0, 0, 2, 0, // should be kept + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, -2, 0, 0, 0, 0, 0, 0 // should be removed + }; // should be removed + Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan); + ExpectArrayEq(qcoeff_scan, { 1, 0, 0, 0, -5, 0, 0, -1, // + 0, 0, 0, 10, 0, 0, 2, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0 }); +} + +TEST(DropoutTest, KeepsSmallCoeffsCloseToStartOrEnd) { + const TX_SIZE tx_size = TX_8X4; + const uint32_t dropout_num_before = 4; + const uint32_t dropout_num_after = 6; + // Small coeffs that are too close to the beginning or end of the block + // should also be kept (not enough zeroes before/after). + tran_low_t qcoeff_scan[] = { 0, 0, -1, 0, 0, 0, 0, 0, // should be kept + 0, 0, 0, 10, 0, 0, 0, 0, // should be kept + 0, 0, 0, 2, 0, 0, 0, 0, // should be removed + 0, 0, 0, 0, 0, 0, -1, 0 }; // should be kept + Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan); + ExpectArrayEq(qcoeff_scan, { 0, 0, -1, 0, 0, 0, 0, 0, // + 0, 0, 0, 10, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, -1, 0 }); +} + +TEST(DropoutTest, RemovesSmallClusterOfCoeffs) { + const TX_SIZE tx_size = TX_8X4; + const uint32_t dropout_num_before = 4; + const uint32_t dropout_num_after = 6; + // Small clusters (<= kDropoutContinuityMax) of small coeffs should be + // removed. + tran_low_t qcoeff_scan_two[] = { + 0, 0, 0, 0, 1, 0, 0, -1, // should be removed + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 1, 0, // should be removed + 0, 0, 0, 0, 0, 0, 0, 0 + }; + Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, + qcoeff_scan_two); + ExpectArrayEq(qcoeff_scan_two, { 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0 }); +} + +TEST(DropoutTest, KeepsLargeClusterOfCoeffs) { + const TX_SIZE tx_size = TX_8X4; + const uint32_t dropout_num_before = 4; + const uint32_t dropout_num_after = 6; + // Large clusters (> kDropoutContinuityMax) of small coeffs should be kept. + tran_low_t qcoeff_scan[] = { 0, 0, 0, 0, 1, 0, 1, -1, // should be kept + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, -2, 0, 0, // should be removed + 0, 0, 0, 0, 0, 0, 0, 0 }; + Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan); + ExpectArrayEq(qcoeff_scan, { 0, 0, 0, 0, 1, 0, 1, -1, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0 }); +} + +TEST(DropoutTest, NumBeforeLargerThanNumAfter) { + const TX_SIZE tx_size = TX_8X4; + const uint32_t dropout_num_before = 4; + const uint32_t dropout_num_after = 2; + // The second coeff (-2) doesn't seem to meet the dropout_num_before + // criteria. But since the first coeff (1) will be dropped, it will meet + // the criteria and should be dropped too. + tran_low_t qcoeff_scan[] = { 0, 0, 0, 0, 1, 0, 0, 0, // should be removed + -2, 0, 0, 0, 0, 0, 0, 0, // should be removed + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0 }; + Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan); + ExpectArrayEq(qcoeff_scan, { 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0 }); +} + +// More complex test combining other test cases. +TEST(DropoutTest, ComplexTest) { + const TX_SIZE tx_size = TX_8X8; + const uint32_t dropout_num_before = 4; + const uint32_t dropout_num_after = 2; + tran_low_t qcoeff_scan[] = { 1, 12, 0, 0, 0, 0, 1, 0, // + 0, 0, 0, -12, 0, 0, 0, 1, // + 0, 0, -2, 0, 1, 0, 0, 1, // + 0, 0, 0, 0, 5, 0, -1, 0, // + 0, 0, 0, 1, 0, 0, 0, -1, // + 0, 0, 0, 0, 2, 0, 0, 0, // + 0, 1, 0, 0, 0, 5, 0, 0, // + 0, 0, 1, 1, 0, 0, 0, -2 }; + Dropout(tx_size, kTxType, dropout_num_before, dropout_num_after, qcoeff_scan); + ExpectArrayEq(qcoeff_scan, { 1, 12, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, -12, 0, 0, 0, 1, // + 0, 0, -2, 0, 1, 0, 0, 1, // + 0, 0, 0, 0, 5, 0, -1, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 5, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, -2 }); +} + +} // namespace diff --git a/third_party/aom/test/encodetxb_test.cc b/third_party/aom/test/encodetxb_test.cc new file mode 100644 index 0000000000..49b0fba94a --- /dev/null +++ b/third_party/aom/test/encodetxb_test.cc @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/av1_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "aom_ports/mem.h" +#include "av1/common/av1_common_int.h" +#include "av1/common/idct.h" +#include "av1/common/scan.h" +#include "av1/common/txb_common.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { +using libaom_test::ACMRandom; + +typedef void (*GetNzMapContextsFunc)(const uint8_t *const levels, + const int16_t *const scan, + const uint16_t eob, const TX_SIZE tx_size, + const TX_CLASS tx_class, + int8_t *const coeff_contexts); + +class EncodeTxbTest : public ::testing::TestWithParam<GetNzMapContextsFunc> { + public: + EncodeTxbTest() : get_nz_map_contexts_func_(GetParam()) {} + + ~EncodeTxbTest() override = default; + + void SetUp() override { + coeff_contexts_ref_ = reinterpret_cast<int8_t *>( + aom_memalign(16, sizeof(*coeff_contexts_ref_) * MAX_TX_SQUARE)); + ASSERT_NE(coeff_contexts_ref_, nullptr); + coeff_contexts_ = reinterpret_cast<int8_t *>( + aom_memalign(16, sizeof(*coeff_contexts_) * MAX_TX_SQUARE)); + ASSERT_NE(coeff_contexts_, nullptr); + } + + void TearDown() override { + aom_free(coeff_contexts_ref_); + aom_free(coeff_contexts_); + } + + void GetNzMapContextsRun() { + const int kNumTests = 10; + int result = 0; + + for (int is_inter = 0; is_inter < 2; ++is_inter) { + for (int tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { + const TX_CLASS tx_class = tx_type_to_class[tx_type]; + for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) { + const int bhl = get_txb_bhl((TX_SIZE)tx_size); + const int width = get_txb_wide((TX_SIZE)tx_size); + const int height = get_txb_high((TX_SIZE)tx_size); + const int real_width = tx_size_wide[tx_size]; + const int real_height = tx_size_high[tx_size]; + const int16_t *const scan = av1_scan_orders[tx_size][tx_type].scan; + + levels_ = set_levels(levels_buf_, height); + for (int i = 0; i < kNumTests && !result; ++i) { + for (int eob = 1; eob <= width * height && !result; ++eob) { + InitDataWithEob(scan, bhl, eob); + + av1_get_nz_map_contexts_c(levels_, scan, eob, (TX_SIZE)tx_size, + tx_class, coeff_contexts_ref_); + get_nz_map_contexts_func_(levels_, scan, eob, (TX_SIZE)tx_size, + tx_class, coeff_contexts_); + + result = Compare(scan, eob); + + EXPECT_EQ(result, 0) + << " tx_class " << (int)tx_class << " width " << real_width + << " height " << real_height << " eob " << eob; + } + } + } + } + } + } + + void SpeedTestGetNzMapContextsRun() { + const int kNumTests = 2000000000; + aom_usec_timer timer; + aom_usec_timer timer_ref; + + printf("Note: Only test the largest possible eob case!\n"); + for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) { + const int bhl = get_txb_bhl((TX_SIZE)tx_size); + const int width = get_txb_wide((TX_SIZE)tx_size); + const int height = get_txb_high((TX_SIZE)tx_size); + const int real_width = tx_size_wide[tx_size]; + const int real_height = tx_size_high[tx_size]; + const TX_TYPE tx_type = DCT_DCT; + const TX_CLASS tx_class = tx_type_to_class[tx_type]; + const int16_t *const scan = av1_scan_orders[tx_size][tx_type].scan; + const int eob = width * height; + const int numTests = kNumTests / (width * height); + + levels_ = set_levels(levels_buf_, height); + InitDataWithEob(scan, bhl, eob); + + aom_usec_timer_start(&timer_ref); + for (int i = 0; i < numTests; ++i) { + av1_get_nz_map_contexts_c(levels_, scan, eob, (TX_SIZE)tx_size, + tx_class, coeff_contexts_ref_); + } + aom_usec_timer_mark(&timer_ref); + + levels_ = set_levels(levels_buf_, height); + InitDataWithEob(scan, bhl, eob); + + aom_usec_timer_start(&timer); + for (int i = 0; i < numTests; ++i) { + get_nz_map_contexts_func_(levels_, scan, eob, (TX_SIZE)tx_size, + tx_class, coeff_contexts_); + } + aom_usec_timer_mark(&timer); + + const int elapsed_time_ref = + static_cast<int>(aom_usec_timer_elapsed(&timer_ref)); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + + printf("get_nz_map_contexts_%2dx%2d: %7.1f ms ref %7.1f ms gain %4.2f\n", + real_width, real_height, elapsed_time / 1000.0, + elapsed_time_ref / 1000.0, + (elapsed_time_ref * 1.0) / (elapsed_time * 1.0)); + } + } + + private: + void InitDataWithEob(const int16_t *const scan, const int bhl, + const int eob) { + memset(levels_buf_, 0, sizeof(levels_buf_)); + memset(coeff_contexts_, 0, sizeof(*coeff_contexts_) * MAX_TX_SQUARE); + + for (int c = 0; c < eob; ++c) { + levels_[get_padded_idx(scan[c], bhl)] = + static_cast<uint8_t>(clamp(rnd_.Rand8(), 0, INT8_MAX)); + coeff_contexts_[scan[c]] = static_cast<int8_t>(rnd_.Rand16() >> 1); + } + + memcpy(coeff_contexts_ref_, coeff_contexts_, + sizeof(*coeff_contexts_) * MAX_TX_SQUARE); + } + + bool Compare(const int16_t *const scan, const int eob) const { + bool result = false; + if (memcmp(coeff_contexts_, coeff_contexts_ref_, + sizeof(*coeff_contexts_ref_) * MAX_TX_SQUARE)) { + for (int i = 0; i < eob; i++) { + const int pos = scan[i]; + if (coeff_contexts_ref_[pos] != coeff_contexts_[pos]) { + printf("coeff_contexts_[%d] diff:%6d (ref),%6d (opt)\n", pos, + coeff_contexts_ref_[pos], coeff_contexts_[pos]); + result = true; + break; + } + } + } + return result; + } + + GetNzMapContextsFunc get_nz_map_contexts_func_; + ACMRandom rnd_; + uint8_t levels_buf_[TX_PAD_2D]; + uint8_t *levels_; + int8_t *coeff_contexts_ref_; + int8_t *coeff_contexts_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(EncodeTxbTest); + +TEST_P(EncodeTxbTest, GetNzMapContexts) { GetNzMapContextsRun(); } + +TEST_P(EncodeTxbTest, DISABLED_SpeedTestGetNzMapContexts) { + SpeedTestGetNzMapContextsRun(); +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, EncodeTxbTest, + ::testing::Values(av1_get_nz_map_contexts_sse2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, EncodeTxbTest, + ::testing::Values(av1_get_nz_map_contexts_neon)); +#endif + +typedef void (*av1_txb_init_levels_func)(const tran_low_t *const coeff, + const int width, const int height, + uint8_t *const levels); + +typedef std::tuple<av1_txb_init_levels_func, int> TxbInitLevelParam; + +class EncodeTxbInitLevelTest + : public ::testing::TestWithParam<TxbInitLevelParam> { + public: + ~EncodeTxbInitLevelTest() override = default; + void RunTest(av1_txb_init_levels_func test_func, int tx_size, int is_speed); +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(EncodeTxbInitLevelTest); + +void EncodeTxbInitLevelTest::RunTest(av1_txb_init_levels_func test_func, + int tx_size, int is_speed) { + const int width = get_txb_wide((TX_SIZE)tx_size); + const int height = get_txb_high((TX_SIZE)tx_size); + tran_low_t coeff[MAX_TX_SQUARE]; + + uint8_t levels_buf[2][TX_PAD_2D]; + uint8_t *const levels0 = set_levels(levels_buf[0], height); + uint8_t *const levels1 = set_levels(levels_buf[1], height); + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int i = 0; i < width * height; i++) { + coeff[i] = rnd.Rand16Signed(); + } + for (int i = 0; i < TX_PAD_2D; i++) { + levels_buf[0][i] = rnd.Rand8(); + levels_buf[1][i] = rnd.Rand8(); + } + const int run_times = is_speed ? (width * height) * 10000 : 1; + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + av1_txb_init_levels_c(coeff, width, height, levels0); + } + const double t1 = get_time_mark(&timer); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + test_func(coeff, width, height, levels1); + } + const double t2 = get_time_mark(&timer); + if (is_speed) { + printf("init %3dx%-3d:%7.2f/%7.2fns", width, height, t1, t2); + printf("(%3.2f)\n", t1 / t2); + } + const int stride = width + TX_PAD_HOR; + for (int r = 0; r < height + TX_PAD_VER; ++r) { + for (int c = 0; c < stride; ++c) { + ASSERT_EQ(levels_buf[0][c + r * stride], levels_buf[1][c + r * stride]) + << "[" << r << "," << c << "] " << run_times << width << "x" + << height; + } + } +} + +TEST_P(EncodeTxbInitLevelTest, match) { + RunTest(GET_PARAM(0), GET_PARAM(1), 0); +} + +TEST_P(EncodeTxbInitLevelTest, DISABLED_Speed) { + RunTest(GET_PARAM(0), GET_PARAM(1), 1); +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, EncodeTxbInitLevelTest, + ::testing::Combine(::testing::Values(&av1_txb_init_levels_sse4_1), + ::testing::Range(0, static_cast<int>(TX_SIZES_ALL), 1))); +#endif +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, EncodeTxbInitLevelTest, + ::testing::Combine(::testing::Values(&av1_txb_init_levels_avx2), + ::testing::Range(0, static_cast<int>(TX_SIZES_ALL), 1))); +#endif +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, EncodeTxbInitLevelTest, + ::testing::Combine(::testing::Values(&av1_txb_init_levels_neon), + ::testing::Range(0, static_cast<int>(TX_SIZES_ALL), 1))); +#endif +} // namespace diff --git a/third_party/aom/test/end_to_end_psnr_test.cc b/third_party/aom/test/end_to_end_psnr_test.cc new file mode 100644 index 0000000000..687308da8c --- /dev/null +++ b/third_party/aom/test/end_to_end_psnr_test.cc @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> +#include <ostream> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" + +namespace { + +const unsigned int kWidth = 160; +const unsigned int kHeight = 90; +const unsigned int kFramerate = 50; +const unsigned int kFrames = 10; +const int kBitrate = 500; +const unsigned int kCqLevel = 18; +// List of psnr thresholds for speed settings 0-8 and 4 encoding modes +const double kPsnrThreshold[][4] = { + { 34.9, 44.4, 39.5, 41.9 }, { 34.9, 44.4, 39.5, 41.9 }, + { 34.9, 44.4, 39.4, 41.9 }, { 34.9, 44.4, 39.1, 41.8 }, + { 34.9, 44.4, 39.1, 41.8 }, { 34.9, 44.29, 38.5, 41.8 }, + { 34.9, 44.3, 38.5, 41.3 }, { 34.9, 44.3, 38.5, 40.8 }, + { 34.9, 44.3, 38.5, 40.8 } +}; + +typedef struct { + const char *filename; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) { + return os << "TestVideoParam { filename:" << test_arg.filename + << " input_bit_depth:" << test_arg.input_bit_depth + << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth + << " profile:" << test_arg.profile << " }"; +} + +const TestVideoParam kTestVectors[] = { + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 2 }, + { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 }, +#if CONFIG_AV1_HIGHBITDEPTH + { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 0 }, + { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 2 }, + { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 1 }, + { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 }, + { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 2 }, + { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 2 }, +#endif +}; + +// Encoding modes tested +const libaom_test::TestMode kEncodingModeVectors[] = { + ::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood, + ::libaom_test::kRealTime, +}; + +// Speed settings tested +const int kCpuUsedVectors[] = { 1, 2, 3, 5, 6 }; + +class EndToEndTest + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, + TestVideoParam, int>, + public ::libaom_test::EncoderTest { + protected: + EndToEndTest() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(2)), + cpu_used_(GET_PARAM(3)), psnr_(0.0), nframes_(0), + encoding_mode_(GET_PARAM(1)) {} + + ~EndToEndTest() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + if (encoding_mode_ == ::libaom_test::kOnePassGood || + encoding_mode_ == ::libaom_test::kTwoPassGood) { + cfg_.g_lag_in_frames = 5; + } else if (encoding_mode_ == ::libaom_test::kRealTime) { + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + } + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 4); + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + // Test screen coding tools at cpu_used = 1 && encoding mode is two-pass. + if (cpu_used_ == 1 && encoding_mode_ == ::libaom_test::kTwoPassGood) + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN); + else + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); + if (encoding_mode_ == ::libaom_test::kOnePassGood || + encoding_mode_ == ::libaom_test::kTwoPassGood) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } else if (encoding_mode_ == ::libaom_test::kAllIntra) { + encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel); + } + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetPsnrThreshold() { + return kPsnrThreshold[cpu_used_][encoding_mode_]; + } + + void DoTest() { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + std::unique_ptr<libaom_test::VideoSource> video; + if (is_extension_y4m(test_video_param_.filename)) { + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + } else { + video.reset(new libaom_test::YUVVideoSource( + test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight, + kFramerate, 1, 0, kFrames)); + } + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, GetPsnrThreshold() * 0.98) + << "cpu used = " << cpu_used_ << ", encoding mode = " << encoding_mode_; + } + + TestVideoParam test_video_param_; + int cpu_used_; + + private: + double psnr_; + unsigned int nframes_; + libaom_test::TestMode encoding_mode_; +}; + +class EndToEndTestLarge : public EndToEndTest {}; + +class EndToEndAllIntraTestLarge : public EndToEndTest {}; + +class EndToEndAllIntraTest : public EndToEndTest {}; + +TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { DoTest(); } + +TEST_P(EndToEndTest, EndtoEndPSNRTest) { DoTest(); } + +TEST_P(EndToEndAllIntraTestLarge, EndtoEndPSNRTest) { DoTest(); } + +TEST_P(EndToEndAllIntraTest, EndtoEndPSNRTest) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(EndToEndTestLarge, + ::testing::ValuesIn(kEncodingModeVectors), + ::testing::ValuesIn(kTestVectors), + ::testing::ValuesIn(kCpuUsedVectors)); + +AV1_INSTANTIATE_TEST_SUITE(EndToEndTest, + ::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Values(kTestVectors[2]), // 444 + ::testing::Values(3)); // cpu_used + +AV1_INSTANTIATE_TEST_SUITE(EndToEndAllIntraTestLarge, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::ValuesIn(kTestVectors), + ::testing::Values(2, 4, 6, 8)); // cpu_used + +AV1_INSTANTIATE_TEST_SUITE(EndToEndAllIntraTest, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::Values(kTestVectors[0]), // 420 + ::testing::Values(6)); // cpu_used +} // namespace diff --git a/third_party/aom/test/end_to_end_qmpsnr_test.cc b/third_party/aom/test/end_to_end_qmpsnr_test.cc new file mode 100644 index 0000000000..7a755a7a51 --- /dev/null +++ b/third_party/aom/test/end_to_end_qmpsnr_test.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2022, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> + +#include "aom_ports/mem.h" +#include "aom_dsp/ssim.h" +#include "av1/common/blockd.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +const unsigned int kFrames = 10; +const unsigned int kCqLevel = 18; +// List of ssim thresholds for speed settings 0-8 with all intra encoding mode. +const double kSsimThreshold[] = { 83.4, 83.4, 83.4, 83.3, 83.3, + 83.0, 82.3, 81.1, 81.1 }; + +typedef struct { + const char *filename; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) { + return os << "TestVideoParam { filename:" << test_arg.filename + << " input_bit_depth:" << test_arg.input_bit_depth + << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth + << " profile:" << test_arg.profile << " }"; +} + +const TestVideoParam kTestVectors[] = { + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 2 }, + { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 }, +#if CONFIG_AV1_HIGHBITDEPTH + { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 0 }, + { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 2 }, + { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 1 }, + { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 }, + { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 2 }, + { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 2 }, +#endif +}; + +// This class is used to check adherence to given ssim value, while using the +// "dist-metric=qm-psnr" option. +class EndToEndQMPSNRTest + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, + TestVideoParam, int>, + public ::libaom_test::EncoderTest { + protected: + EndToEndQMPSNRTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + test_video_param_(GET_PARAM(2)), cpu_used_(GET_PARAM(3)), nframes_(0), + ssim_(0.0) {} + + ~EndToEndQMPSNRTest() override = default; + + void SetUp() override { InitializeConfig(encoding_mode_); } + + void BeginPassHook(unsigned int) override { + nframes_ = 0; + ssim_ = 0.0; + } + + void CalculateFrameLevelSSIM(const aom_image_t *img_src, + const aom_image_t *img_enc, + aom_bit_depth_t bit_depth, + unsigned int input_bit_depth) override { + double frame_ssim; + double plane_ssim[MAX_MB_PLANE] = { 0.0, 0.0, 0.0 }; + int crop_widths[PLANE_TYPES]; + int crop_heights[PLANE_TYPES]; + crop_widths[PLANE_TYPE_Y] = img_src->d_w; + crop_heights[PLANE_TYPE_Y] = img_src->d_h; + // Width of UV planes calculated based on chroma_shift values. + crop_widths[PLANE_TYPE_UV] = + img_src->x_chroma_shift == 1 ? (img_src->w + 1) >> 1 : img_src->w; + crop_heights[PLANE_TYPE_UV] = + img_src->y_chroma_shift == 1 ? (img_src->h + 1) >> 1 : img_src->h; + nframes_++; + +#if CONFIG_AV1_HIGHBITDEPTH + uint8_t is_hbd = bit_depth > AOM_BITS_8; + if (is_hbd) { + // HBD ssim calculation. + uint8_t shift = bit_depth - input_bit_depth; + for (int i = AOM_PLANE_Y; i < MAX_MB_PLANE; ++i) { + const int is_uv = i > AOM_PLANE_Y; + plane_ssim[i] = aom_highbd_ssim2( + CONVERT_TO_BYTEPTR(img_src->planes[i]), + CONVERT_TO_BYTEPTR(img_enc->planes[i]), + img_src->stride[is_uv] >> is_hbd, img_enc->stride[is_uv] >> is_hbd, + crop_widths[is_uv], crop_heights[is_uv], input_bit_depth, shift); + } + frame_ssim = plane_ssim[AOM_PLANE_Y] * .8 + + .1 * (plane_ssim[AOM_PLANE_U] + plane_ssim[AOM_PLANE_V]); + // Accumulate to find sequence level ssim value. + ssim_ += frame_ssim; + return; + } +#else + (void)bit_depth; + (void)input_bit_depth; +#endif // CONFIG_AV1_HIGHBITDEPTH + + // LBD ssim calculation. + for (int i = AOM_PLANE_Y; i < MAX_MB_PLANE; ++i) { + const int is_uv = i > AOM_PLANE_Y; + plane_ssim[i] = aom_ssim2(img_src->planes[i], img_enc->planes[i], + img_src->stride[is_uv], img_enc->stride[is_uv], + crop_widths[is_uv], crop_heights[is_uv]); + } + frame_ssim = plane_ssim[AOM_PLANE_Y] * .8 + + .1 * (plane_ssim[AOM_PLANE_U] + plane_ssim[AOM_PLANE_V]); + // Accumulate to find sequence level ssim value. + ssim_ += frame_ssim; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 4); + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_TUNING, AOM_TUNE_SSIM); + encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel); + encoder->SetOption("dist-metric", "qm-psnr"); + } + } + + double GetAverageSsim() const { + if (nframes_) return 100 * pow(ssim_ / nframes_, 8.0); + return 0.0; + } + + double GetSsimThreshold() { return kSsimThreshold[cpu_used_]; } + + void DoTest() { + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + std::unique_ptr<libaom_test::VideoSource> video( + new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + ASSERT_NE(video, nullptr); + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double ssim = GetAverageSsim(); + EXPECT_GT(ssim, GetSsimThreshold()) + << "encoding mode = " << encoding_mode_ << ", cpu used = " << cpu_used_; + } + + private: + const libaom_test::TestMode encoding_mode_; + const TestVideoParam test_video_param_; + const int cpu_used_; + unsigned int nframes_; + double ssim_; +}; + +class EndToEndQMPSNRTestLarge : public EndToEndQMPSNRTest {}; + +TEST_P(EndToEndQMPSNRTestLarge, EndtoEndQMPSNRTest) { DoTest(); } + +TEST_P(EndToEndQMPSNRTest, EndtoEndQMPSNRTest) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(EndToEndQMPSNRTestLarge, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::ValuesIn(kTestVectors), + ::testing::Values(2, 4, 6, 8)); // cpu_used + +AV1_INSTANTIATE_TEST_SUITE(EndToEndQMPSNRTest, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::Values(kTestVectors[0]), // 420 + ::testing::Values(6)); // cpu_used +} // namespace diff --git a/third_party/aom/test/end_to_end_ssim_test.cc b/third_party/aom/test/end_to_end_ssim_test.cc new file mode 100644 index 0000000000..f1b0cae75f --- /dev/null +++ b/third_party/aom/test/end_to_end_ssim_test.cc @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "aom_ports/mem.h" +#include "aom_dsp/ssim.h" +#include "av1/common/blockd.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +const unsigned int kFrames = 10; +const unsigned int kCqLevel = 18; +// List of ssim thresholds for speed settings 0-8 with all intra encoding mode. +const double kSsimThreshold[] = { 83.4, 83.4, 83.4, 83.3, 83.3, + 83.0, 82.3, 81.1, 81.1 }; + +typedef struct { + const char *filename; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) { + return os << "TestVideoParam { filename:" << test_arg.filename + << " input_bit_depth:" << test_arg.input_bit_depth + << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth + << " profile:" << test_arg.profile << " }"; +} + +const TestVideoParam kTestVectors[] = { + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 2 }, + { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 }, +#if CONFIG_AV1_HIGHBITDEPTH + { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 0 }, + { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 2 }, + { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 1 }, + { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 }, + { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 2 }, + { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 2 }, +#endif +}; + +// This class is used to check adherence to given ssim value. +class EndToEndSSIMTest + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, + TestVideoParam, int>, + public ::libaom_test::EncoderTest { + protected: + EndToEndSSIMTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + test_video_param_(GET_PARAM(2)), cpu_used_(GET_PARAM(3)), nframes_(0), + ssim_(0.0) {} + + ~EndToEndSSIMTest() override = default; + + void SetUp() override { InitializeConfig(encoding_mode_); } + + void BeginPassHook(unsigned int) override { + nframes_ = 0; + ssim_ = 0.0; + } + + void CalculateFrameLevelSSIM(const aom_image_t *img_src, + const aom_image_t *img_enc, + aom_bit_depth_t bit_depth, + unsigned int input_bit_depth) override { + double frame_ssim; + double plane_ssim[MAX_MB_PLANE] = { 0.0, 0.0, 0.0 }; + int crop_widths[PLANE_TYPES]; + int crop_heights[PLANE_TYPES]; + crop_widths[PLANE_TYPE_Y] = img_src->d_w; + crop_heights[PLANE_TYPE_Y] = img_src->d_h; + // Width of UV planes calculated based on chroma_shift values. + crop_widths[PLANE_TYPE_UV] = + img_src->x_chroma_shift == 1 ? (img_src->w + 1) >> 1 : img_src->w; + crop_heights[PLANE_TYPE_UV] = + img_src->y_chroma_shift == 1 ? (img_src->h + 1) >> 1 : img_src->h; + nframes_++; + +#if CONFIG_AV1_HIGHBITDEPTH + uint8_t is_hbd = bit_depth > AOM_BITS_8; + if (is_hbd) { + // HBD ssim calculation. + uint8_t shift = bit_depth - input_bit_depth; + for (int i = AOM_PLANE_Y; i < MAX_MB_PLANE; ++i) { + const int is_uv = i > AOM_PLANE_Y; + plane_ssim[i] = aom_highbd_ssim2( + CONVERT_TO_BYTEPTR(img_src->planes[i]), + CONVERT_TO_BYTEPTR(img_enc->planes[i]), + img_src->stride[is_uv] >> is_hbd, img_enc->stride[is_uv] >> is_hbd, + crop_widths[is_uv], crop_heights[is_uv], input_bit_depth, shift); + } + frame_ssim = plane_ssim[AOM_PLANE_Y] * .8 + + .1 * (plane_ssim[AOM_PLANE_U] + plane_ssim[AOM_PLANE_V]); + // Accumulate to find sequence level ssim value. + ssim_ += frame_ssim; + return; + } +#else + (void)bit_depth; + (void)input_bit_depth; +#endif // CONFIG_AV1_HIGHBITDEPTH + + // LBD ssim calculation. + for (int i = AOM_PLANE_Y; i < MAX_MB_PLANE; ++i) { + const int is_uv = i > AOM_PLANE_Y; + plane_ssim[i] = aom_ssim2(img_src->planes[i], img_enc->planes[i], + img_src->stride[is_uv], img_enc->stride[is_uv], + crop_widths[is_uv], crop_heights[is_uv]); + } + frame_ssim = plane_ssim[AOM_PLANE_Y] * .8 + + .1 * (plane_ssim[AOM_PLANE_U] + plane_ssim[AOM_PLANE_V]); + // Accumulate to find sequence level ssim value. + ssim_ += frame_ssim; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 4); + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_TUNING, AOM_TUNE_SSIM); + encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel); + } + } + + double GetAverageSsim() const { + if (nframes_) return 100 * pow(ssim_ / nframes_, 8.0); + return 0.0; + } + + double GetSsimThreshold() { return kSsimThreshold[cpu_used_]; } + + void DoTest() { + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + std::unique_ptr<libaom_test::VideoSource> video( + new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + ASSERT_NE(video, nullptr); + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double ssim = GetAverageSsim(); + EXPECT_GT(ssim, GetSsimThreshold()) + << "encoding mode = " << encoding_mode_ << ", cpu used = " << cpu_used_; + } + + private: + const libaom_test::TestMode encoding_mode_; + const TestVideoParam test_video_param_; + const int cpu_used_; + unsigned int nframes_; + double ssim_; +}; + +class EndToEndSSIMTestLarge : public EndToEndSSIMTest {}; + +TEST_P(EndToEndSSIMTestLarge, EndtoEndSSIMTest) { DoTest(); } + +TEST_P(EndToEndSSIMTest, EndtoEndSSIMTest) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(EndToEndSSIMTestLarge, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::ValuesIn(kTestVectors), + ::testing::Values(2, 4, 6, 8)); // cpu_used + +AV1_INSTANTIATE_TEST_SUITE(EndToEndSSIMTest, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::Values(kTestVectors[0]), // 420 + ::testing::Values(6)); // cpu_used +} // namespace diff --git a/third_party/aom/test/error_block_test.cc b/third_party/aom/test/error_block_test.cc new file mode 100644 index 0000000000..e7cd870a98 --- /dev/null +++ b/third_party/aom/test/error_block_test.cc @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cmath> +#include <cstdlib> +#include <string> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/av1_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/entropy.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" + +using libaom_test::ACMRandom; + +namespace { +const int kNumIterations = 1000; + +using ErrorBlockFunc = int64_t (*)(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, int64_t *ssz, int bps); + +using ErrorBlockFunc8Bits = int64_t (*)(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, int64_t *ssz); + +using ErrorBlockLpFunc = int64_t (*)(const int16_t *coeff, + const int16_t *dqcoeff, + intptr_t block_size); + +using ErrorBlockParam = + std::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t>; + +template <ErrorBlockFunc8Bits fn> +int64_t BlockError8BitWrapper(const tran_low_t *coeff, + const tran_low_t *dqcoeff, intptr_t block_size, + int64_t *ssz, int bps) { + EXPECT_EQ(bps, 8); + return fn(coeff, dqcoeff, block_size, ssz); +} + +template <ErrorBlockLpFunc fn> +int64_t BlockErrorLpWrapper(const tran_low_t *coeff, const tran_low_t *dqcoeff, + intptr_t block_size, int64_t *ssz, int bps) { + EXPECT_EQ(bps, 8); + *ssz = -1; + return fn(reinterpret_cast<const int16_t *>(coeff), + reinterpret_cast<const int16_t *>(dqcoeff), block_size); +} + +class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> { + public: + ~ErrorBlockTest() override = default; + void SetUp() override { + error_block_op_ = GET_PARAM(0); + ref_error_block_op_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + } + + protected: + aom_bit_depth_t bit_depth_; + ErrorBlockFunc error_block_op_; + ErrorBlockFunc ref_error_block_op_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ErrorBlockTest); + +TEST_P(ErrorBlockTest, OperationCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); + int err_count_total = 0; + int first_failure = -1; + intptr_t block_size; + int64_t ssz; + int64_t ret; + int64_t ref_ssz; + int64_t ref_ret; + const int msb = bit_depth_ + 8 - 1; + for (int i = 0; i < kNumIterations; ++i) { + int err_count = 0; + block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64 + for (int j = 0; j < block_size; j++) { + // coeff and dqcoeff will always have at least the same sign, and this + // can be used for optimization, so generate test input precisely. + if (rnd(2)) { + // Positive number + coeff[j] = rnd(1 << msb); + dqcoeff[j] = rnd(1 << msb); + } else { + // Negative number + coeff[j] = -rnd(1 << msb); + dqcoeff[j] = -rnd(1 << msb); + } + } + ref_ret = + ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); + API_REGISTER_STATE_CHECK( + ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_)); + err_count += (ref_ret != ret) | (ref_ssz != ssz); + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Error Block Test, C output doesn't match optimized output. " + << "First failed at test case " << first_failure; +} + +TEST_P(ErrorBlockTest, ExtremeValues) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); + int err_count_total = 0; + int first_failure = -1; + intptr_t block_size; + int64_t ssz; + int64_t ret; + int64_t ref_ssz; + int64_t ref_ret; + const int msb = bit_depth_ + 8 - 1; + int max_val = ((1 << msb) - 1); + for (int i = 0; i < kNumIterations; ++i) { + int err_count = 0; + int k = (i / 9) % 9; + + // Change the maximum coeff value, to test different bit boundaries + if (k == 8 && (i % 9) == 0) { + max_val >>= 1; + } + block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64 + for (int j = 0; j < block_size; j++) { + if (k < 4) { + // Test at positive maximum values + coeff[j] = k % 2 ? max_val : 0; + dqcoeff[j] = (k >> 1) % 2 ? max_val : 0; + } else if (k < 8) { + // Test at negative maximum values + coeff[j] = k % 2 ? -max_val : 0; + dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0; + } else { + if (rnd(2)) { + // Positive number + coeff[j] = rnd(1 << 14); + dqcoeff[j] = rnd(1 << 14); + } else { + // Negative number + coeff[j] = -rnd(1 << 14); + dqcoeff[j] = -rnd(1 << 14); + } + } + } + ref_ret = + ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); + API_REGISTER_STATE_CHECK( + ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_)); + err_count += (ref_ret != ret) | (ref_ssz != ssz); + if (err_count && !err_count_total) { + first_failure = i; + } + err_count_total += err_count; + } + EXPECT_EQ(0, err_count_total) + << "Error: Error Block Test, C output doesn't match optimized output. " + << "First failed at test case " << first_failure; +} + +TEST_P(ErrorBlockTest, DISABLED_Speed) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); + intptr_t block_size; + int64_t ssz; + int num_iters = 100000; + int64_t ref_ssz; + const int msb = bit_depth_ + 8 - 1; + for (int i = 0; i < 9; ++i) { + block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64 + for (int k = 0; k < 9; k++) { + for (int j = 0; j < block_size; j++) { + if (k < 5) { + if (rnd(2)) { + // Positive number + coeff[j] = rnd(1 << msb); + dqcoeff[j] = rnd(1 << msb); + } else { + // Negative number + coeff[j] = -rnd(1 << msb); + dqcoeff[j] = -rnd(1 << msb); + } + } else { + if (rnd(2)) { + // Positive number + coeff[j] = rnd(1 << 14); + dqcoeff[j] = rnd(1 << 14); + } else { + // Negative number + coeff[j] = -rnd(1 << 14); + dqcoeff[j] = -rnd(1 << 14); + } + } + } + aom_usec_timer ref_timer, test_timer; + + aom_usec_timer_start(&ref_timer); + for (int iter = 0; iter < num_iters; ++iter) { + ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int iter = 0; iter < num_iters; ++iter) { + error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_); + } + aom_usec_timer_mark(&test_timer); + + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + + printf( + " c_time=%d \t simd_time=%d \t " + "gain=%d \n", + elapsed_time_c, elapsed_time_simd, + (elapsed_time_c / elapsed_time_simd)); + } + } +} + +using std::make_tuple; + +#if HAVE_SSE2 +const ErrorBlockParam kErrorBlockTestParamsSse2[] = { +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(&av1_highbd_block_error_sse2, &av1_highbd_block_error_c, + AOM_BITS_10), + make_tuple(&av1_highbd_block_error_sse2, &av1_highbd_block_error_c, + AOM_BITS_12), + make_tuple(&av1_highbd_block_error_sse2, &av1_highbd_block_error_c, + AOM_BITS_8), +#endif + make_tuple(&BlockError8BitWrapper<av1_block_error_sse2>, + &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8), + make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_sse2>, + &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8) +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, ErrorBlockTest, + ::testing::ValuesIn(kErrorBlockTestParamsSse2)); +#endif // HAVE_SSE2 + +#if HAVE_AVX2 +const ErrorBlockParam kErrorBlockTestParamsAvx2[] = { +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(&av1_highbd_block_error_avx2, &av1_highbd_block_error_c, + AOM_BITS_10), + make_tuple(&av1_highbd_block_error_avx2, &av1_highbd_block_error_c, + AOM_BITS_12), + make_tuple(&av1_highbd_block_error_avx2, &av1_highbd_block_error_c, + AOM_BITS_8), +#endif + make_tuple(&BlockError8BitWrapper<av1_block_error_avx2>, + &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8), + make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_avx2>, + &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8) +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, ErrorBlockTest, + ::testing::ValuesIn(kErrorBlockTestParamsAvx2)); +#endif // HAVE_AVX2 + +#if HAVE_NEON +const ErrorBlockParam kErrorBlockTestParamsNeon[] = { +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(&av1_highbd_block_error_neon, &av1_highbd_block_error_c, + AOM_BITS_10), + make_tuple(&av1_highbd_block_error_neon, &av1_highbd_block_error_c, + AOM_BITS_12), + make_tuple(&av1_highbd_block_error_neon, &av1_highbd_block_error_c, + AOM_BITS_8), +#endif + make_tuple(&BlockError8BitWrapper<av1_block_error_neon>, + &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8), + make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_neon>, + &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8) +}; + +INSTANTIATE_TEST_SUITE_P(NEON, ErrorBlockTest, + ::testing::ValuesIn(kErrorBlockTestParamsNeon)); +#endif // HAVE_NEON + +#if HAVE_SVE +const ErrorBlockParam kErrorBlockTestParamsSVE[] = { + make_tuple(&BlockError8BitWrapper<av1_block_error_sve>, + &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8), + make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_sve>, + &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8) +}; + +INSTANTIATE_TEST_SUITE_P(SVE, ErrorBlockTest, + ::testing::ValuesIn(kErrorBlockTestParamsSVE)); +#endif // HAVE_SVE +} // namespace diff --git a/third_party/aom/test/error_resilience_test.cc b/third_party/aom/test/error_resilience_test.cc new file mode 100644 index 0000000000..d41884df2b --- /dev/null +++ b/third_party/aom/test/error_resilience_test.cc @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +const int kMaxErrorFrames = 12; +const int kMaxInvisibleErrorFrames = 12; +const int kMaxDroppableFrames = 12; +const int kMaxErrorResilientFrames = 12; +const int kMaxNoMFMVFrames = 12; +const int kMaxPrimRefNoneFrames = 12; +const int kMaxSFrames = 12; +const int kCpuUsed = 1; + +class ErrorResilienceTestLarge + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, + public ::libaom_test::EncoderTest { + protected: + ErrorResilienceTestLarge() + : EncoderTest(GET_PARAM(0)), psnr_(0.0), nframes_(0), mismatch_psnr_(0.0), + mismatch_nframes_(0), encoding_mode_(GET_PARAM(1)), allow_mismatch_(0), + enable_altref_(GET_PARAM(2)) { + Reset(); + } + + ~ErrorResilienceTestLarge() override = default; + + void Reset() { + error_nframes_ = 0; + invisible_error_nframes_ = 0; + droppable_nframes_ = 0; + error_resilient_nframes_ = 0; + nomfmv_nframes_ = 0; + prim_ref_none_nframes_ = 0; + s_nframes_ = 0; + } + + void SetupEncoder(int bitrate, int lag) { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = bitrate; + cfg_.kf_mode = AOM_KF_DISABLED; + cfg_.g_lag_in_frames = lag; + init_flags_ = AOM_CODEC_USE_PSNR; + } + + void SetUp() override { InitializeConfig(encoding_mode_); } + + void BeginPassHook(unsigned int /*pass*/) override { + psnr_ = 0.0; + nframes_ = 0; + decoded_nframes_ = 0; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, enable_altref_); + } + frame_flags_ &= + ~(AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF | + AOM_EFLAG_NO_REF_FRAME_MVS | AOM_EFLAG_ERROR_RESILIENT | + AOM_EFLAG_SET_S_FRAME | AOM_EFLAG_SET_PRIMARY_REF_NONE); + if (droppable_nframes_ > 0 && + (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) { + for (unsigned int i = 0; i < droppable_nframes_; ++i) { + if (droppable_frames_[i] == video->frame()) { + std::cout << " Encoding droppable frame: " + << droppable_frames_[i] << "\n"; + frame_flags_ |= (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF); + break; + } + } + } + + if (error_resilient_nframes_ > 0 && + (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) { + for (unsigned int i = 0; i < error_resilient_nframes_; ++i) { + if (error_resilient_frames_[i] == video->frame()) { + std::cout << " Encoding error_resilient frame: " + << error_resilient_frames_[i] << "\n"; + frame_flags_ |= AOM_EFLAG_ERROR_RESILIENT; + break; + } + } + } + + if (nomfmv_nframes_ > 0 && + (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) { + for (unsigned int i = 0; i < nomfmv_nframes_; ++i) { + if (nomfmv_frames_[i] == video->frame()) { + std::cout << " Encoding no mfmv frame: " + << nomfmv_frames_[i] << "\n"; + frame_flags_ |= AOM_EFLAG_NO_REF_FRAME_MVS; + break; + } + } + } + + if (prim_ref_none_nframes_ > 0 && + (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) { + for (unsigned int i = 0; i < prim_ref_none_nframes_; ++i) { + if (prim_ref_none_frames_[i] == video->frame()) { + std::cout << " Encoding no PRIMARY_REF_NONE frame: " + << prim_ref_none_frames_[i] << "\n"; + frame_flags_ |= AOM_EFLAG_SET_PRIMARY_REF_NONE; + break; + } + } + } + + encoder->Control(AV1E_SET_S_FRAME_MODE, 0); + if (s_nframes_ > 0 && + (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) { + for (unsigned int i = 0; i < s_nframes_; ++i) { + if (s_frames_[i] == video->frame()) { + std::cout << " Encoding S frame: " << s_frames_[i] + << "\n"; + frame_flags_ |= AOM_EFLAG_SET_S_FRAME; + break; + } + } + } + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + // Check that the encode frame flags are correctly reflected + // in the output frame flags. + const int encode_flags = pkt->data.frame.flags >> 16; + if ((encode_flags & (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF)) == + (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF)) { + ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_DROPPABLE, + AOM_FRAME_IS_DROPPABLE); + } + if (encode_flags & AOM_EFLAG_SET_S_FRAME) { + ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_SWITCH, + AOM_FRAME_IS_SWITCH); + } + if (encode_flags & AOM_EFLAG_ERROR_RESILIENT) { + ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_ERROR_RESILIENT, + AOM_FRAME_IS_ERROR_RESILIENT); + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetAverageMismatchPsnr() const { + if (mismatch_nframes_) return mismatch_psnr_ / mismatch_nframes_; + return 0.0; + } + + bool DoDecode() const override { + if (error_nframes_ > 0 && + (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) { + for (unsigned int i = 0; i < error_nframes_; ++i) { + if (error_frames_[i] == nframes_ - 1) { + std::cout << " Skipping decoding frame: " + << error_frames_[i] << "\n"; + return false; + } + } + } + return true; + } + + bool DoDecodeInvisible() const override { + if (invisible_error_nframes_ > 0 && + (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) { + for (unsigned int i = 0; i < invisible_error_nframes_; ++i) { + if (invisible_error_frames_[i] == nframes_ - 1) { + std::cout << " Skipping decoding all invisible frames in " + "frame pkt: " + << invisible_error_frames_[i] << "\n"; + return false; + } + } + } + return true; + } + + void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) override { + if (allow_mismatch_) { + double mismatch_psnr = compute_psnr(img1, img2); + mismatch_psnr_ += mismatch_psnr; + ++mismatch_nframes_; + // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n"; + } else { + ::libaom_test::EncoderTest::MismatchHook(img1, img2); + } + } + + void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) override { + (void)img; + (void)pts; + ++decoded_nframes_; + } + + void SetErrorFrames(int num, unsigned int *list) { + if (num > kMaxErrorFrames) + num = kMaxErrorFrames; + else if (num < 0) + num = 0; + error_nframes_ = num; + for (unsigned int i = 0; i < error_nframes_; ++i) + error_frames_[i] = list[i]; + } + + void SetInvisibleErrorFrames(int num, unsigned int *list) { + if (num > kMaxInvisibleErrorFrames) + num = kMaxInvisibleErrorFrames; + else if (num < 0) + num = 0; + invisible_error_nframes_ = num; + for (unsigned int i = 0; i < invisible_error_nframes_; ++i) + invisible_error_frames_[i] = list[i]; + } + + void SetDroppableFrames(int num, unsigned int *list) { + if (num > kMaxDroppableFrames) + num = kMaxDroppableFrames; + else if (num < 0) + num = 0; + droppable_nframes_ = num; + for (unsigned int i = 0; i < droppable_nframes_; ++i) + droppable_frames_[i] = list[i]; + } + + void SetErrorResilientFrames(int num, unsigned int *list) { + if (num > kMaxErrorResilientFrames) + num = kMaxErrorResilientFrames; + else if (num < 0) + num = 0; + error_resilient_nframes_ = num; + for (unsigned int i = 0; i < error_resilient_nframes_; ++i) + error_resilient_frames_[i] = list[i]; + } + + void SetNoMFMVFrames(int num, unsigned int *list) { + if (num > kMaxNoMFMVFrames) + num = kMaxNoMFMVFrames; + else if (num < 0) + num = 0; + nomfmv_nframes_ = num; + for (unsigned int i = 0; i < nomfmv_nframes_; ++i) + nomfmv_frames_[i] = list[i]; + } + + void SetPrimaryRefNoneFrames(int num, unsigned int *list) { + if (num > kMaxPrimRefNoneFrames) + num = kMaxPrimRefNoneFrames; + else if (num < 0) + num = 0; + prim_ref_none_nframes_ = num; + for (unsigned int i = 0; i < prim_ref_none_nframes_; ++i) + prim_ref_none_frames_[i] = list[i]; + } + + void SetSFrames(int num, unsigned int *list) { + if (num > kMaxSFrames) + num = kMaxSFrames; + else if (num < 0) + num = 0; + s_nframes_ = num; + for (unsigned int i = 0; i < s_nframes_; ++i) s_frames_[i] = list[i]; + } + + unsigned int GetMismatchFrames() { return mismatch_nframes_; } + unsigned int GetEncodedFrames() { return nframes_; } + unsigned int GetDecodedFrames() { return decoded_nframes_; } + + void SetAllowMismatch(int allow) { allow_mismatch_ = allow; } + + private: + double psnr_; + unsigned int nframes_; + unsigned int decoded_nframes_; + unsigned int error_nframes_; + unsigned int invisible_error_nframes_; + unsigned int droppable_nframes_; + unsigned int error_resilient_nframes_; + unsigned int nomfmv_nframes_; + unsigned int prim_ref_none_nframes_; + unsigned int s_nframes_; + double mismatch_psnr_; + unsigned int mismatch_nframes_; + unsigned int error_frames_[kMaxErrorFrames]; + unsigned int invisible_error_frames_[kMaxInvisibleErrorFrames]; + unsigned int droppable_frames_[kMaxDroppableFrames]; + unsigned int error_resilient_frames_[kMaxErrorResilientFrames]; + unsigned int nomfmv_frames_[kMaxNoMFMVFrames]; + unsigned int prim_ref_none_frames_[kMaxPrimRefNoneFrames]; + unsigned int s_frames_[kMaxSFrames]; + libaom_test::TestMode encoding_mode_; + int allow_mismatch_; + int enable_altref_; +}; + +TEST_P(ErrorResilienceTestLarge, OnVersusOff) { + SetupEncoder(2000, 10); + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 12); + + // Global error resilient mode OFF. + cfg_.g_error_resilient = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_resilience_off = GetAveragePsnr(); + EXPECT_GT(psnr_resilience_off, 25.0); + + Reset(); + // Error resilient mode ON for certain frames + unsigned int num_error_resilient_frames = 5; + unsigned int error_resilient_frame_list[] = { 3, 5, 6, 9, 11 }; + SetErrorResilientFrames(num_error_resilient_frames, + error_resilient_frame_list); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_resilience_on = GetAveragePsnr(); + EXPECT_GT(psnr_resilience_on, 25.0); + + // Test that turning on error resilient mode hurts by 10% at most. + if (psnr_resilience_off > 0.0) { + const double psnr_ratio = psnr_resilience_on / psnr_resilience_off; + EXPECT_GE(psnr_ratio, 0.9); + EXPECT_LE(psnr_ratio, 1.1); + } +} + +// Check for successful decoding and no encoder/decoder mismatch +// if we lose (i.e., drop before decoding) a set of droppable +// frames (i.e., frames that don't update any reference buffers). +TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) { + if (GET_PARAM(1) == ::libaom_test::kOnePassGood && GET_PARAM(2) == 1) { + fprintf(stderr, "Skipping test case #1 because of bug aomedia:3002\n"); + return; + } + SetupEncoder(500, 10); + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 20); + + // Set an arbitrary set of error frames same as droppable frames. + unsigned int num_droppable_frames = 3; + unsigned int droppable_frame_list[] = { 5, 11, 13 }; + SetDroppableFrames(num_droppable_frames, droppable_frame_list); + SetErrorFrames(num_droppable_frames, droppable_frame_list); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Test that no mismatches have been found + std::cout << " Encoded frames: " << GetEncodedFrames() << "\n"; + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_droppable_frames); +} + +// Check for ParseAbility property of an error-resilient frame. +// Encode a frame in error-resilient mode (E-frame), and disallow all +// subsequent frames from using MFMV. If frames are dropped before the +// E frame, all frames starting from the E frame should be parse-able. +TEST_P(ErrorResilienceTestLarge, ParseAbilityTest) { + SetupEncoder(500, 10); + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 15); + + SetAllowMismatch(1); + + // Note that an E-frame cannot be forced on a frame that is a + // show_existing_frame, or a frame that comes directly after an invisible + // frame. Currently, this will cause an assertion failure. + // Set an arbitrary error resilient (E) frame + unsigned int num_error_resilient_frames = 1; + unsigned int error_resilient_frame_list[] = { 8 }; + SetErrorResilientFrames(num_error_resilient_frames, + error_resilient_frame_list); + // Ensure that any invisible frames before the E frame are dropped + SetInvisibleErrorFrames(num_error_resilient_frames, + error_resilient_frame_list); + // Set all frames after the error resilient frame to not allow MFMV + unsigned int num_post_error_resilient_frames = 6; + unsigned int post_error_resilient_frame_list[] = { 9, 10, 11, 12, 13, 14 }; + SetNoMFMVFrames(num_post_error_resilient_frames, + post_error_resilient_frame_list); + + // Set a few frames before the E frame that are lost (not decoded) + unsigned int num_error_frames = 5; + unsigned int error_frame_list[] = { 3, 4, 5, 6, 7 }; + SetErrorFrames(num_error_frames, error_frame_list); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::cout << " Encoded frames: " << GetEncodedFrames() << "\n"; + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_error_frames); + // All frames following the E-frame and the E-frame are expected to have + // mismatches, but still be parse-able. + EXPECT_LE(GetMismatchFrames(), num_post_error_resilient_frames + 1); +} + +// Check for ParseAbility property of an S frame. +// Encode an S-frame. If frames are dropped before the S-frame, all frames +// starting from the S frame should be parse-able. +TEST_P(ErrorResilienceTestLarge, SFrameTest) { + SetupEncoder(500, 10); + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 15); + + SetAllowMismatch(1); + + // Note that an S-frame cannot be forced on a frame that is a + // show_existing_frame. This issue still needs to be addressed. + // Set an arbitrary S-frame + unsigned int num_s_frames = 1; + unsigned int s_frame_list[] = { 6 }; + SetSFrames(num_s_frames, s_frame_list); + // Ensure that any invisible frames before the S frame are dropped + SetInvisibleErrorFrames(num_s_frames, s_frame_list); + + // Set a few frames before the S frame that are lost (not decoded) + unsigned int num_error_frames = 4; + unsigned int error_frame_list[] = { 2, 3, 4, 5 }; + SetErrorFrames(num_error_frames, error_frame_list); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::cout << " Encoded frames: " << GetEncodedFrames() << "\n"; + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_error_frames); + // All frames following the S-frame and the S-frame are expected to have + // mismatches, but still be parse-able. + EXPECT_LE(GetMismatchFrames(), GetEncodedFrames() - s_frame_list[0]); +} + +AV1_INSTANTIATE_TEST_SUITE(ErrorResilienceTestLarge, NONREALTIME_TEST_MODES, + ::testing::Values(0, 1)); +} // namespace diff --git a/third_party/aom/test/ethread_test.cc b/third_party/aom/test/ethread_test.cc new file mode 100644 index 0000000000..ce45394eb8 --- /dev/null +++ b/third_party/aom/test/ethread_test.cc @@ -0,0 +1,577 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" +#include "av1/encoder/firstpass.h" + +namespace { +const unsigned int kCqLevel = 18; + +#if !CONFIG_REALTIME_ONLY +const size_t kFirstPassStatsSz = sizeof(FIRSTPASS_STATS); +class AVxFirstPassEncoderThreadTest + : public ::libaom_test::CodecTestWith4Params<libaom_test::TestMode, int, + int, int>, + public ::libaom_test::EncoderTest { + protected: + AVxFirstPassEncoderThreadTest() + : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), + encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)), + tile_rows_(GET_PARAM(3)), tile_cols_(GET_PARAM(4)) { + init_flags_ = AOM_CODEC_USE_PSNR; + + row_mt_ = 1; + firstpass_stats_.buf = nullptr; + firstpass_stats_.sz = 0; + } + ~AVxFirstPassEncoderThreadTest() override { free(firstpass_stats_.buf); } + + void SetUp() override { + InitializeConfig(encoding_mode_); + + cfg_.g_lag_in_frames = 35; + cfg_.rc_end_usage = AOM_VBR; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_max_quantizer = 56; + cfg_.rc_min_quantizer = 0; + } + + void BeginPassHook(unsigned int /*pass*/) override { + encoder_initialized_ = false; + abort_ = false; + } + + void EndPassHook() override { + // For first pass stats test, only run first pass encoder. + if (cfg_.g_pass == AOM_RC_FIRST_PASS) abort_ = true; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource * /*video*/, + ::libaom_test::Encoder *encoder) override { + if (!encoder_initialized_) { + // Encode in 2-pass mode. + SetTileSize(encoder); + encoder->Control(AV1E_SET_ROW_MT, row_mt_); + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0); + + encoder_initialized_ = true; + } + } + + virtual void SetTileSize(libaom_test::Encoder *encoder) { + encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_); + encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_); + } + + void StatsPktHook(const aom_codec_cx_pkt_t *pkt) override { + const uint8_t *const pkt_buf = + reinterpret_cast<uint8_t *>(pkt->data.twopass_stats.buf); + const size_t pkt_size = pkt->data.twopass_stats.sz; + + // First pass stats size equals sizeof(FIRSTPASS_STATS) + EXPECT_EQ(pkt_size, kFirstPassStatsSz) + << "Error: First pass stats size doesn't equal kFirstPassStatsSz"; + + firstpass_stats_.buf = + realloc(firstpass_stats_.buf, firstpass_stats_.sz + pkt_size); + ASSERT_NE(firstpass_stats_.buf, nullptr); + memcpy((uint8_t *)firstpass_stats_.buf + firstpass_stats_.sz, pkt_buf, + pkt_size); + firstpass_stats_.sz += pkt_size; + } + + bool encoder_initialized_; + ::libaom_test::TestMode encoding_mode_; + int set_cpu_used_; + int tile_rows_; + int tile_cols_; + int row_mt_; + aom_fixed_buf_t firstpass_stats_; +}; + +static void compare_fp_stats_md5(aom_fixed_buf_t *fp_stats) { + // fp_stats consists of 2 set of first pass encoding stats. These 2 set of + // stats are compared to check if the stats match. + uint8_t *stats1 = reinterpret_cast<uint8_t *>(fp_stats->buf); + uint8_t *stats2 = stats1 + fp_stats->sz / 2; + ::libaom_test::MD5 md5_row_mt_0, md5_row_mt_1; + + md5_row_mt_0.Add(stats1, fp_stats->sz / 2); + const char *md5_row_mt_0_str = md5_row_mt_0.Get(); + + md5_row_mt_1.Add(stats2, fp_stats->sz / 2); + const char *md5_row_mt_1_str = md5_row_mt_1.Get(); + + // Check md5 match. + ASSERT_STREQ(md5_row_mt_0_str, md5_row_mt_1_str) + << "MD5 checksums don't match"; +} + +TEST_P(AVxFirstPassEncoderThreadTest, FirstPassStatsTest) { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + aom_fixed_buf_t firstpass_stats; + size_t single_run_sz; + + cfg_.rc_target_bitrate = 1000; + + // 5 encodes will be run: + // 1. row_mt_=0 and threads=1 + // 2. row_mt_=1 and threads=1 + // 3. row_mt_=1 and threads=2 + // 4. row_mt_=1 and threads=4 + // 5. row_mt_=1 and threads=8 + + // 4 comparisons will be made: + // 1. Between run 1 and run 2. + // 2. Between run 2 and run 3. + // 3. Between run 3 and run 4. + // 4. Between run 4 and run 5. + + // Test row_mt_: 0 vs 1 at single thread case(threads = 1) + cfg_.g_threads = 1; + + row_mt_ = 0; + init_flags_ = AOM_CODEC_USE_PSNR; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + row_mt_ = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + firstpass_stats.buf = firstpass_stats_.buf; + firstpass_stats.sz = firstpass_stats_.sz; + single_run_sz = firstpass_stats_.sz / 2; + + // Compare to check if using or not using row-mt are bit exact. + // Comparison 1 (between row_mt_=0 and row_mt_=1). + ASSERT_NO_FATAL_FAILURE(compare_fp_stats_md5(&firstpass_stats)); + + // Test single thread vs multiple threads + row_mt_ = 1; + + cfg_.g_threads = 2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // offset to the 2nd and 3rd run. + firstpass_stats.buf = reinterpret_cast<void *>( + reinterpret_cast<uint8_t *>(firstpass_stats_.buf) + single_run_sz); + + // Compare to check if single-thread and multi-thread stats are bit exact. + // Comparison 2 (between threads=1 and threads=2). + ASSERT_NO_FATAL_FAILURE(compare_fp_stats_md5(&firstpass_stats)); + + cfg_.g_threads = 4; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // offset to the 3rd and 4th run + firstpass_stats.buf = reinterpret_cast<void *>( + reinterpret_cast<uint8_t *>(firstpass_stats_.buf) + single_run_sz * 2); + + // Comparison 3 (between threads=2 and threads=4). + ASSERT_NO_FATAL_FAILURE(compare_fp_stats_md5(&firstpass_stats)); + + cfg_.g_threads = 8; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // offset to the 4th and 5th run. + firstpass_stats.buf = reinterpret_cast<void *>( + reinterpret_cast<uint8_t *>(firstpass_stats_.buf) + single_run_sz * 3); + + // Comparison 4 (between threads=4 and threads=8). + compare_fp_stats_md5(&firstpass_stats); +} +#endif // !CONFIG_REALTIME_ONLY + +class AVxEncoderThreadTest + : public ::libaom_test::CodecTestWith5Params<libaom_test::TestMode, int, + int, int, int>, + public ::libaom_test::EncoderTest { + protected: + AVxEncoderThreadTest() + : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), + encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)), + tile_cols_(GET_PARAM(3)), tile_rows_(GET_PARAM(4)), + row_mt_(GET_PARAM(5)) { + init_flags_ = AOM_CODEC_USE_PSNR; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = 1280; + cfg.h = 720; + cfg.allow_lowbitdepth = 1; + decoder_ = codec_->CreateDecoder(cfg, 0); + if (decoder_->IsAV1()) { + decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder_->Control(AV1_SET_DECODE_TILE_COL, -1); + } + + size_enc_.clear(); + md5_dec_.clear(); + md5_enc_.clear(); + } + ~AVxEncoderThreadTest() override { delete decoder_; } + + void SetUp() override { + InitializeConfig(encoding_mode_); + + if (encoding_mode_ == ::libaom_test::kOnePassGood || + encoding_mode_ == ::libaom_test::kTwoPassGood) { + cfg_.g_lag_in_frames = 6; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + } else if (encoding_mode_ == ::libaom_test::kRealTime) { + cfg_.g_error_resilient = 1; + } + cfg_.rc_max_quantizer = 56; + cfg_.rc_min_quantizer = 0; + } + + void BeginPassHook(unsigned int /*pass*/) override { + encoder_initialized_ = false; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource * /*video*/, + ::libaom_test::Encoder *encoder) override { + if (!encoder_initialized_) { + SetTileSize(encoder); + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_SET_ROW_MT, row_mt_); + if (encoding_mode_ == ::libaom_test::kOnePassGood || + encoding_mode_ == ::libaom_test::kTwoPassGood) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 5); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0); + encoder->Control(AV1E_SET_MAX_GF_INTERVAL, 4); + // In row_mt_=0 case, the output of single thread (1 thread) will be + // compared with multi thread (4 thread) output (as per line no:340). + // Currently, Loop restoration stage is conditionally disabled for speed + // 5, 6 when num_workers > 1. Due to this, the match between single + // thread and multi thread output can not be achieved. Hence, testing + // this case alone with LR disabled. + // TODO(aomedia:3446): Remove the constraint on this test case once Loop + // restoration state is same in both single and multi thread path. + if (set_cpu_used_ >= 5 && row_mt_ == 0) + encoder->Control(AV1E_SET_ENABLE_RESTORATION, 0); + } else if (encoding_mode_ == ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0); + encoder->Control(AV1E_SET_AQ_MODE, 3); + encoder->Control(AV1E_SET_COEFF_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_MODE_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_MV_COST_UPD_FREQ, 3); + encoder->Control(AV1E_SET_DV_COST_UPD_FREQ, 3); + } else { + encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel); + } + encoder_initialized_ = true; + } + } + + virtual void SetTileSize(libaom_test::Encoder *encoder) { + encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_); + encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_); + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + size_enc_.push_back(pkt->data.frame.sz); + + ::libaom_test::MD5 md5_enc; + md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz); + md5_enc_.push_back(md5_enc.Get()); + + const aom_codec_err_t res = decoder_->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + const aom_image_t *img = decoder_->GetDxData().Next(); + + if (img) { + ::libaom_test::MD5 md5_res; + md5_res.Add(img); + md5_dec_.push_back(md5_res.Get()); + } + } + + void DoTest() { + ::libaom_test::YUVVideoSource video( + "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, 640, 480, 30, 1, 15, 26); + cfg_.rc_target_bitrate = 1000; + + if (row_mt_ == 0) { + // Encode using single thread. + cfg_.g_threads = 1; + init_flags_ = AOM_CODEC_USE_PSNR; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::vector<size_t> single_thr_size_enc; + std::vector<std::string> single_thr_md5_enc; + std::vector<std::string> single_thr_md5_dec; + single_thr_size_enc = size_enc_; + single_thr_md5_enc = md5_enc_; + single_thr_md5_dec = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Encode using multiple threads. + cfg_.g_threads = 4; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::vector<size_t> multi_thr_size_enc; + std::vector<std::string> multi_thr_md5_enc; + std::vector<std::string> multi_thr_md5_dec; + multi_thr_size_enc = size_enc_; + multi_thr_md5_enc = md5_enc_; + multi_thr_md5_dec = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Check that the vectors are equal. + ASSERT_EQ(single_thr_size_enc, multi_thr_size_enc); + ASSERT_EQ(single_thr_md5_enc, multi_thr_md5_enc); + ASSERT_EQ(single_thr_md5_dec, multi_thr_md5_dec); + + DoTestMaxThreads(&video, single_thr_size_enc, single_thr_md5_enc, + single_thr_md5_dec); + } else if (row_mt_ == 1) { + // Encode using multiple threads row-mt enabled. + cfg_.g_threads = 2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::vector<size_t> multi_thr2_row_mt_size_enc; + std::vector<std::string> multi_thr2_row_mt_md5_enc; + std::vector<std::string> multi_thr2_row_mt_md5_dec; + multi_thr2_row_mt_size_enc = size_enc_; + multi_thr2_row_mt_md5_enc = md5_enc_; + multi_thr2_row_mt_md5_dec = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Disable threads=3 test for now to reduce the time so that the nightly + // test would not time out. + // cfg_.g_threads = 3; + // ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // std::vector<size_t> multi_thr3_row_mt_size_enc; + // std::vector<std::string> multi_thr3_row_mt_md5_enc; + // std::vector<std::string> multi_thr3_row_mt_md5_dec; + // multi_thr3_row_mt_size_enc = size_enc_; + // multi_thr3_row_mt_md5_enc = md5_enc_; + // multi_thr3_row_mt_md5_dec = md5_dec_; + // size_enc_.clear(); + // md5_enc_.clear(); + // md5_dec_.clear(); + // Check that the vectors are equal. + // ASSERT_EQ(multi_thr3_row_mt_size_enc, multi_thr2_row_mt_size_enc); + // ASSERT_EQ(multi_thr3_row_mt_md5_enc, multi_thr2_row_mt_md5_enc); + // ASSERT_EQ(multi_thr3_row_mt_md5_dec, multi_thr2_row_mt_md5_dec); + + cfg_.g_threads = 4; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::vector<size_t> multi_thr4_row_mt_size_enc; + std::vector<std::string> multi_thr4_row_mt_md5_enc; + std::vector<std::string> multi_thr4_row_mt_md5_dec; + multi_thr4_row_mt_size_enc = size_enc_; + multi_thr4_row_mt_md5_enc = md5_enc_; + multi_thr4_row_mt_md5_dec = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Check that the vectors are equal. + ASSERT_EQ(multi_thr4_row_mt_size_enc, multi_thr2_row_mt_size_enc); + ASSERT_EQ(multi_thr4_row_mt_md5_enc, multi_thr2_row_mt_md5_enc); + ASSERT_EQ(multi_thr4_row_mt_md5_dec, multi_thr2_row_mt_md5_dec); + + DoTestMaxThreads(&video, multi_thr2_row_mt_size_enc, + multi_thr2_row_mt_md5_enc, multi_thr2_row_mt_md5_dec); + } + } + + virtual void DoTestMaxThreads(::libaom_test::YUVVideoSource *video, + const std::vector<size_t> ref_size_enc, + const std::vector<std::string> ref_md5_enc, + const std::vector<std::string> ref_md5_dec) { + // This value should be kept the same as MAX_NUM_THREADS + // in aom_thread.h + cfg_.g_threads = 64; + ASSERT_NO_FATAL_FAILURE(RunLoop(video)); + std::vector<size_t> multi_thr_max_row_mt_size_enc; + std::vector<std::string> multi_thr_max_row_mt_md5_enc; + std::vector<std::string> multi_thr_max_row_mt_md5_dec; + multi_thr_max_row_mt_size_enc = size_enc_; + multi_thr_max_row_mt_md5_enc = md5_enc_; + multi_thr_max_row_mt_md5_dec = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Check that the vectors are equal. + ASSERT_EQ(ref_size_enc, multi_thr_max_row_mt_size_enc); + ASSERT_EQ(ref_md5_enc, multi_thr_max_row_mt_md5_enc); + ASSERT_EQ(ref_md5_dec, multi_thr_max_row_mt_md5_dec); + } + + bool encoder_initialized_; + ::libaom_test::TestMode encoding_mode_; + int set_cpu_used_; + int tile_cols_; + int tile_rows_; + int row_mt_; + ::libaom_test::Decoder *decoder_; + std::vector<size_t> size_enc_; + std::vector<std::string> md5_enc_; + std::vector<std::string> md5_dec_; +}; + +class AVxEncoderThreadRTTest : public AVxEncoderThreadTest {}; + +TEST_P(AVxEncoderThreadRTTest, EncoderResultTest) { + cfg_.large_scale_tile = 0; + decoder_->Control(AV1_SET_TILE_MODE, 0); + DoTest(); +} + +// For real time mode, test speed 5, 6, 7, 8, 9, 10. +AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadRTTest, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Values(5, 6, 7, 8, 9, 10), + ::testing::Values(0, 2), ::testing::Values(0, 2), + ::testing::Values(0, 1)); + +#if !CONFIG_REALTIME_ONLY + +// The AVxEncoderThreadTestLarge takes up ~14% of total run-time of the +// Valgrind long tests. Exclude it; the smaller tests are still run. +#if !AOM_VALGRIND_BUILD +class AVxEncoderThreadTestLarge : public AVxEncoderThreadTest {}; + +TEST_P(AVxEncoderThreadTestLarge, EncoderResultTest) { + cfg_.large_scale_tile = 0; + decoder_->Control(AV1_SET_TILE_MODE, 0); + DoTest(); +} + +// Test cpu_used 0, 1, 3 and 5. +AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadTestLarge, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::Values(0, 1, 3, 5), + ::testing::Values(1, 6), ::testing::Values(1, 6), + ::testing::Values(0, 1)); +#endif // !AOM_VALGRIND_BUILD + +TEST_P(AVxEncoderThreadTest, EncoderResultTest) { + cfg_.large_scale_tile = 0; + decoder_->Control(AV1_SET_TILE_MODE, 0); + DoTest(); +} + +class AVxEncoderThreadAllIntraTest : public AVxEncoderThreadTest {}; + +TEST_P(AVxEncoderThreadAllIntraTest, EncoderResultTest) { + cfg_.large_scale_tile = 0; + decoder_->Control(AV1_SET_TILE_MODE, 0); + DoTest(); +} + +class AVxEncoderThreadAllIntraTestLarge : public AVxEncoderThreadTest {}; + +TEST_P(AVxEncoderThreadAllIntraTestLarge, EncoderResultTest) { + cfg_.large_scale_tile = 0; + decoder_->Control(AV1_SET_TILE_MODE, 0); + DoTest(); +} + +// first pass stats test +AV1_INSTANTIATE_TEST_SUITE(AVxFirstPassEncoderThreadTest, + ::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Range(0, 6, 2), ::testing::Range(0, 2), + ::testing::Range(1, 3)); + +// For AV1, test speed 0, 1, 2, 3, 5. +// Only test cpu_used 2 here. +AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadTest, + ::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Values(2), ::testing::Values(0, 2), + ::testing::Values(0, 2), ::testing::Values(0, 1)); + +// For all intra mode, test speed 0, 2, 4, 6, 8. +// Only test cpu_used 6 here. +AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadAllIntraTest, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::Values(6), ::testing::Values(0, 2), + ::testing::Values(0, 2), ::testing::Values(0, 1)); + +// Test cpu_used 0, 2, 4 and 8. +AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadAllIntraTestLarge, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::Values(0, 2, 4, 8), + ::testing::Values(1, 6), ::testing::Values(1, 6), + ::testing::Values(0, 1)); +#endif // !CONFIG_REALTIME_ONLY + +class AVxEncoderThreadLSTest : public AVxEncoderThreadTest { + void SetTileSize(libaom_test::Encoder *encoder) override { + encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_); + encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_); + } + + void DoTestMaxThreads(::libaom_test::YUVVideoSource *video, + const std::vector<size_t> ref_size_enc, + const std::vector<std::string> ref_md5_enc, + const std::vector<std::string> ref_md5_dec) override { + (void)video; + (void)ref_size_enc; + (void)ref_md5_enc; + (void)ref_md5_dec; + } +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AVxEncoderThreadLSTest); + +TEST_P(AVxEncoderThreadLSTest, EncoderResultTest) { + cfg_.large_scale_tile = 1; + decoder_->Control(AV1_SET_TILE_MODE, 1); + decoder_->Control(AV1D_EXT_TILE_DEBUG, 1); + DoTest(); +} + +// AVxEncoderThreadLSTestLarge takes up about 2% of total run-time of +// the Valgrind long tests. Since we already run AVxEncoderThreadLSTest, +// skip this one for Valgrind. +#if !CONFIG_REALTIME_ONLY && !AOM_VALGRIND_BUILD +class AVxEncoderThreadLSTestLarge : public AVxEncoderThreadLSTest {}; + +TEST_P(AVxEncoderThreadLSTestLarge, EncoderResultTest) { + cfg_.large_scale_tile = 1; + decoder_->Control(AV1_SET_TILE_MODE, 1); + decoder_->Control(AV1D_EXT_TILE_DEBUG, 1); + DoTest(); +} + +AV1_INSTANTIATE_TEST_SUITE(AVxEncoderThreadLSTestLarge, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::Values(1, 3), ::testing::Values(0, 6), + ::testing::Values(0, 6), ::testing::Values(1)); +#endif // !CONFIG_REALTIME_ONLY && !AOM_VALGRIND_BUILD +} // namespace diff --git a/third_party/aom/test/examples.sh b/third_party/aom/test/examples.sh new file mode 100755 index 0000000000..3e1612303c --- /dev/null +++ b/third_party/aom/test/examples.sh @@ -0,0 +1,37 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file runs all of the tests for the libaom examples. +## +readonly EXEC_DIR="$(pwd)" +. $(dirname $0)/tools_common.sh + +example_tests=$(ls -r $(dirname $0)/*.sh) + +# List of script names to exclude. +exclude_list="best_encode examples run_encodes tools_common" + +if [ "$(realtime_only_build)" = "yes" ]; then + exclude_list="${exclude_list} twopass_encoder simple_decoder lightfield_test" +fi + +# Filter out the scripts in $exclude_list. +for word in ${exclude_list}; do + example_tests=$(filter_strings "${example_tests}" "${word}" exclude) +done + +for test in ${example_tests}; do + # Source each test script so that exporting variables can be avoided. + AOM_TEST_NAME="$(basename ${test%.*})" + . "${test}" + # Restore the working directory to the one at the beginning of execution. + # This avoids side-effects from tests that change the directory. + cd "${EXEC_DIR}" +done diff --git a/third_party/aom/test/external_frame_buffer_test.cc b/third_party/aom/test/external_frame_buffer_test.cc new file mode 100644 index 0000000000..8f16c4e2d5 --- /dev/null +++ b/third_party/aom/test/external_frame_buffer_test.cc @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> +#include <string> +#include "common/tools_common.h" +#include "config/aom_config.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/ivf_video_source.h" +#include "test/md5_helper.h" +#include "test/test_vectors.h" +#include "test/util.h" +#if CONFIG_WEBM_IO +#include "test/webm_video_source.h" +#endif + +namespace { + +const int kVideoNameParam = 1; + +struct ExternalFrameBuffer { + uint8_t *data; + size_t size; + int in_use; +}; + +// Class to manipulate a list of external frame buffers. +class ExternalFrameBufferList { + public: + ExternalFrameBufferList() + : num_buffers_(0), num_used_buffers_(0), ext_fb_list_(nullptr) {} + + virtual ~ExternalFrameBufferList() { + for (int i = 0; i < num_buffers_; ++i) { + delete[] ext_fb_list_[i].data; + } + delete[] ext_fb_list_; + } + + // Creates the list to hold the external buffers. Returns true on success. + bool CreateBufferList(int num_buffers) { + if (num_buffers < 0) return false; + + num_buffers_ = num_buffers; + ext_fb_list_ = new ExternalFrameBuffer[num_buffers_]; + if (ext_fb_list_ == nullptr) { + EXPECT_NE(ext_fb_list_, nullptr); + return false; + } + memset(ext_fb_list_, 0, sizeof(ext_fb_list_[0]) * num_buffers_); + return true; + } + + // Searches the frame buffer list for a free frame buffer. Makes sure + // that the frame buffer is at least |min_size| in bytes. Marks that the + // frame buffer is in use by libaom. Finally sets |fb| to point to the + // external frame buffer. Returns < 0 on an error. + int GetFreeFrameBuffer(size_t min_size, aom_codec_frame_buffer_t *fb) { + EXPECT_NE(fb, nullptr); + const int idx = FindFreeBufferIndex(); + if (idx == num_buffers_) return -1; + + if (ext_fb_list_[idx].size < min_size) { + delete[] ext_fb_list_[idx].data; + ext_fb_list_[idx].data = new uint8_t[min_size]; + if (ext_fb_list_[idx].data == nullptr) { + EXPECT_NE(ext_fb_list_[idx].data, nullptr); + } + memset(ext_fb_list_[idx].data, 0, min_size); + ext_fb_list_[idx].size = min_size; + } + + SetFrameBuffer(idx, fb); + + num_used_buffers_++; + return 0; + } + + // Test function that will not allocate any data for the frame buffer. + // Returns < 0 on an error. + int GetZeroFrameBuffer(size_t min_size, aom_codec_frame_buffer_t *fb) { + EXPECT_NE(fb, nullptr); + const int idx = FindFreeBufferIndex(); + if (idx == num_buffers_) return -1; + + if (ext_fb_list_[idx].size < min_size) { + delete[] ext_fb_list_[idx].data; + ext_fb_list_[idx].data = nullptr; + ext_fb_list_[idx].size = min_size; + } + + SetFrameBuffer(idx, fb); + return 0; + } + + // Marks the external frame buffer that |fb| is pointing to as free. + // Returns < 0 on an error. + int ReturnFrameBuffer(aom_codec_frame_buffer_t *fb) { + if (fb == nullptr) { + EXPECT_NE(fb, nullptr); + return -1; + } + ExternalFrameBuffer *const ext_fb = + reinterpret_cast<ExternalFrameBuffer *>(fb->priv); + if (ext_fb == nullptr) { + EXPECT_NE(ext_fb, nullptr); + return -1; + } + EXPECT_EQ(1, ext_fb->in_use); + ext_fb->in_use = 0; + num_used_buffers_--; + return 0; + } + + // Checks that the aom_image_t data is contained within the external frame + // buffer private data passed back in the aom_image_t. + void CheckImageFrameBuffer(const aom_image_t *img) { + const struct ExternalFrameBuffer *const ext_fb = + reinterpret_cast<ExternalFrameBuffer *>(img->fb_priv); + + ASSERT_TRUE(img->planes[0] >= ext_fb->data && + img->planes[0] < (ext_fb->data + ext_fb->size)); + } + + int num_used_buffers() const { return num_used_buffers_; } + + private: + // Returns the index of the first free frame buffer. Returns |num_buffers_| + // if there are no free frame buffers. + int FindFreeBufferIndex() { + int i; + // Find a free frame buffer. + for (i = 0; i < num_buffers_; ++i) { + if (!ext_fb_list_[i].in_use) break; + } + return i; + } + + // Sets |fb| to an external frame buffer. idx is the index into the frame + // buffer list. + void SetFrameBuffer(int idx, aom_codec_frame_buffer_t *fb) { + ASSERT_NE(fb, nullptr); + fb->data = ext_fb_list_[idx].data; + fb->size = ext_fb_list_[idx].size; + ASSERT_EQ(0, ext_fb_list_[idx].in_use); + ext_fb_list_[idx].in_use = 1; + fb->priv = &ext_fb_list_[idx]; + } + + int num_buffers_; + int num_used_buffers_; + ExternalFrameBuffer *ext_fb_list_; +}; + +#if CONFIG_WEBM_IO + +// Callback used by libaom to request the application to return a frame +// buffer of at least |min_size| in bytes. +int get_aom_frame_buffer(void *user_priv, size_t min_size, + aom_codec_frame_buffer_t *fb) { + ExternalFrameBufferList *const fb_list = + reinterpret_cast<ExternalFrameBufferList *>(user_priv); + return fb_list->GetFreeFrameBuffer(min_size, fb); +} + +// Callback used by libaom to tell the application that |fb| is not needed +// anymore. +int release_aom_frame_buffer(void *user_priv, aom_codec_frame_buffer_t *fb) { + ExternalFrameBufferList *const fb_list = + reinterpret_cast<ExternalFrameBufferList *>(user_priv); + return fb_list->ReturnFrameBuffer(fb); +} + +// Callback will not allocate data for frame buffer. +int get_aom_zero_frame_buffer(void *user_priv, size_t min_size, + aom_codec_frame_buffer_t *fb) { + ExternalFrameBufferList *const fb_list = + reinterpret_cast<ExternalFrameBufferList *>(user_priv); + return fb_list->GetZeroFrameBuffer(min_size, fb); +} + +// Callback will allocate one less byte than |min_size|. +int get_aom_one_less_byte_frame_buffer(void *user_priv, size_t min_size, + aom_codec_frame_buffer_t *fb) { + ExternalFrameBufferList *const fb_list = + reinterpret_cast<ExternalFrameBufferList *>(user_priv); + return fb_list->GetFreeFrameBuffer(min_size - 1, fb); +} + +// Callback will not release the external frame buffer. +int do_not_release_aom_frame_buffer(void *user_priv, + aom_codec_frame_buffer_t *fb) { + (void)user_priv; + (void)fb; + return 0; +} + +#endif // CONFIG_WEBM_IO + +// Class for testing passing in external frame buffers to libaom. +class ExternalFrameBufferMD5Test + : public ::libaom_test::DecoderTest, + public ::libaom_test::CodecTestWithParam<const char *> { + protected: + ExternalFrameBufferMD5Test() + : DecoderTest(GET_PARAM(::libaom_test::kCodecFactoryParam)), + md5_file_(nullptr), num_buffers_(0) {} + + ~ExternalFrameBufferMD5Test() override { + if (md5_file_ != nullptr) fclose(md5_file_); + } + + void PreDecodeFrameHook(const libaom_test::CompressedVideoSource &video, + libaom_test::Decoder *decoder) override { + if (num_buffers_ > 0 && video.frame_number() == 0) { + // Have libaom use frame buffers we create. + ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_)); + ASSERT_EQ(AOM_CODEC_OK, + decoder->SetFrameBufferFunctions(GetAV1FrameBuffer, + ReleaseAV1FrameBuffer, this)); + } + } + + void OpenMD5File(const std::string &md5_file_name_) { + md5_file_ = libaom_test::OpenTestDataFile(md5_file_name_); + ASSERT_NE(md5_file_, nullptr) + << "Md5 file open failed. Filename: " << md5_file_name_; + } + + void DecompressedFrameHook(const aom_image_t &img, + const unsigned int frame_number) override { + ASSERT_NE(md5_file_, nullptr); + char expected_md5[33]; + char junk[128]; + + // Read correct md5 checksums. + const int res = fscanf(md5_file_, "%s %s", expected_md5, junk); + ASSERT_NE(EOF, res) << "Read md5 data failed"; + expected_md5[32] = '\0'; + + ::libaom_test::MD5 md5_res; +#if FORCE_HIGHBITDEPTH_DECODING + const aom_img_fmt_t shifted_fmt = + (aom_img_fmt)(img.fmt & ~AOM_IMG_FMT_HIGHBITDEPTH); + if (img.bit_depth == 8 && shifted_fmt != img.fmt) { + aom_image_t *img_shifted = + aom_img_alloc(nullptr, shifted_fmt, img.d_w, img.d_h, 16); + img_shifted->bit_depth = img.bit_depth; + img_shifted->monochrome = img.monochrome; + aom_img_downshift(img_shifted, &img, 0); + md5_res.Add(img_shifted); + aom_img_free(img_shifted); + } else { +#endif + md5_res.Add(&img); +#if FORCE_HIGHBITDEPTH_DECODING + } +#endif + const char *const actual_md5 = md5_res.Get(); + + // Check md5 match. + ASSERT_STREQ(expected_md5, actual_md5) + << "Md5 checksums don't match: frame number = " << frame_number; + + const struct ExternalFrameBuffer *const ext_fb = + reinterpret_cast<ExternalFrameBuffer *>(img.fb_priv); + + ASSERT_TRUE(img.planes[0] >= ext_fb->data && + img.planes[0] < (ext_fb->data + ext_fb->size)); + } + + // Callback to get a free external frame buffer. Return value < 0 is an + // error. + static int GetAV1FrameBuffer(void *user_priv, size_t min_size, + aom_codec_frame_buffer_t *fb) { + ExternalFrameBufferMD5Test *const md5Test = + reinterpret_cast<ExternalFrameBufferMD5Test *>(user_priv); + return md5Test->fb_list_.GetFreeFrameBuffer(min_size, fb); + } + + // Callback to release an external frame buffer. Return value < 0 is an + // error. + static int ReleaseAV1FrameBuffer(void *user_priv, + aom_codec_frame_buffer_t *fb) { + ExternalFrameBufferMD5Test *const md5Test = + reinterpret_cast<ExternalFrameBufferMD5Test *>(user_priv); + return md5Test->fb_list_.ReturnFrameBuffer(fb); + } + + void set_num_buffers(int num_buffers) { num_buffers_ = num_buffers; } + int num_buffers() const { return num_buffers_; } + + private: + FILE *md5_file_; + int num_buffers_; + ExternalFrameBufferList fb_list_; +}; + +#if CONFIG_WEBM_IO +const char kAV1TestFile[] = "av1-1-b8-03-sizeup.mkv"; +const char kAV1NonRefTestFile[] = "av1-1-b8-01-size-226x226.ivf"; + +// Class for testing passing in external frame buffers to libaom. +class ExternalFrameBufferTest : public ::testing::Test { + protected: + ExternalFrameBufferTest() + : video_(nullptr), decoder_(nullptr), num_buffers_(0) {} + + void SetUp() override { + video_ = new libaom_test::WebMVideoSource(kAV1TestFile); + ASSERT_NE(video_, nullptr); + video_->Init(); + video_->Begin(); + + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.allow_lowbitdepth = !FORCE_HIGHBITDEPTH_DECODING; + decoder_ = new libaom_test::AV1Decoder(cfg, 0); + ASSERT_NE(decoder_, nullptr); + } + + void TearDown() override { + delete decoder_; + decoder_ = nullptr; + delete video_; + video_ = nullptr; + } + + // Passes the external frame buffer information to libaom. + aom_codec_err_t SetFrameBufferFunctions( + int num_buffers, aom_get_frame_buffer_cb_fn_t cb_get, + aom_release_frame_buffer_cb_fn_t cb_release) { + if (num_buffers > 0) { + num_buffers_ = num_buffers; + EXPECT_TRUE(fb_list_.CreateBufferList(num_buffers_)); + } + + return decoder_->SetFrameBufferFunctions(cb_get, cb_release, &fb_list_); + } + + aom_codec_err_t DecodeOneFrame() { + const aom_codec_err_t res = + decoder_->DecodeFrame(video_->cxdata(), video_->frame_size()); + CheckDecodedFrames(); + if (res == AOM_CODEC_OK) video_->Next(); + return res; + } + + aom_codec_err_t DecodeRemainingFrames() { + for (; video_->cxdata() != nullptr; video_->Next()) { + const aom_codec_err_t res = + decoder_->DecodeFrame(video_->cxdata(), video_->frame_size()); + if (res != AOM_CODEC_OK) return res; + CheckDecodedFrames(); + } + return AOM_CODEC_OK; + } + + protected: + void CheckDecodedFrames() { + libaom_test::DxDataIterator dec_iter = decoder_->GetDxData(); + const aom_image_t *img = nullptr; + + // Get decompressed data + while ((img = dec_iter.Next()) != nullptr) { + fb_list_.CheckImageFrameBuffer(img); + } + } + + libaom_test::CompressedVideoSource *video_; + libaom_test::AV1Decoder *decoder_; + int num_buffers_; + ExternalFrameBufferList fb_list_; +}; + +class ExternalFrameBufferNonRefTest : public ExternalFrameBufferTest { + protected: + void SetUp() override { + video_ = new libaom_test::IVFVideoSource(kAV1NonRefTestFile); + ASSERT_NE(video_, nullptr); + video_->Init(); + video_->Begin(); + + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.allow_lowbitdepth = !FORCE_HIGHBITDEPTH_DECODING; + decoder_ = new libaom_test::AV1Decoder(cfg, 0); + ASSERT_NE(decoder_, nullptr); + } + + virtual void CheckFrameBufferRelease() { + TearDown(); + ASSERT_EQ(0, fb_list_.num_used_buffers()); + } +}; +#endif // CONFIG_WEBM_IO + +// This test runs through the set of test vectors, and decodes them. +// Libaom will call into the application to allocate a frame buffer when +// needed. The md5 checksums are computed for each frame in the video file. +// If md5 checksums match the correct md5 data, then the test is passed. +// Otherwise, the test failed. +TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) { + const std::string filename = GET_PARAM(kVideoNameParam); + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + + // Number of buffers equals #AOM_MAXIMUM_REF_BUFFERS + + // #AOM_MAXIMUM_WORK_BUFFERS + four jitter buffers. + const int jitter_buffers = 4; + const int num_buffers = + AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS + jitter_buffers; + set_num_buffers(num_buffers); + + // Open compressed video file. + std::unique_ptr<libaom_test::CompressedVideoSource> video; + if (filename.substr(filename.length() - 3, 3) == "ivf") { + video.reset(new libaom_test::IVFVideoSource(filename)); + } else { +#if CONFIG_WEBM_IO + video.reset(new libaom_test::WebMVideoSource(filename)); +#else + fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n", + filename.c_str()); + return; +#endif + } + ASSERT_NE(video, nullptr); + video->Init(); + + // Construct md5 file name. + const std::string md5_filename = filename + ".md5"; + OpenMD5File(md5_filename); + + // Set decode config. + cfg.allow_lowbitdepth = !FORCE_HIGHBITDEPTH_DECODING; + set_cfg(cfg); + + // Decode frame, and check the md5 matching. + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg)); +} + +#if CONFIG_WEBM_IO +TEST_F(ExternalFrameBufferTest, MinFrameBuffers) { + // Minimum number of external frame buffers for AV1 is + // #AOM_MAXIMUM_REF_BUFFERS + #AOM_MAXIMUM_WORK_BUFFERS. + const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS; + ASSERT_EQ(AOM_CODEC_OK, + SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer, + release_aom_frame_buffer)); + ASSERT_EQ(AOM_CODEC_OK, DecodeRemainingFrames()); +} + +TEST_F(ExternalFrameBufferTest, EightJitterBuffers) { + // Number of buffers equals #AOM_MAXIMUM_REF_BUFFERS + + // #AOM_MAXIMUM_WORK_BUFFERS + eight jitter buffers. + const int jitter_buffers = 8; + const int num_buffers = + AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS + jitter_buffers; + ASSERT_EQ(AOM_CODEC_OK, + SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer, + release_aom_frame_buffer)); + ASSERT_EQ(AOM_CODEC_OK, DecodeRemainingFrames()); +} + +TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) { + // Minimum number of external frame buffers for AV1 is + // #AOM_MAXIMUM_REF_BUFFERS + #AOM_MAXIMUM_WORK_BUFFERS. Most files will + // only use 5 frame buffers at one time. + const int num_buffers = 2; + ASSERT_EQ(AOM_CODEC_OK, + SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer, + release_aom_frame_buffer)); + ASSERT_EQ(AOM_CODEC_OK, DecodeOneFrame()); + // Only run this on long clips. Decoding a very short clip will return + // AOM_CODEC_OK even with only 2 buffers. + ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeRemainingFrames()); +} + +TEST_F(ExternalFrameBufferTest, NoRelease) { + const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS; + ASSERT_EQ(AOM_CODEC_OK, + SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer, + do_not_release_aom_frame_buffer)); + ASSERT_EQ(AOM_CODEC_OK, DecodeOneFrame()); + ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeRemainingFrames()); +} + +TEST_F(ExternalFrameBufferTest, NullRealloc) { + const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS; + ASSERT_EQ(AOM_CODEC_OK, + SetFrameBufferFunctions(num_buffers, get_aom_zero_frame_buffer, + release_aom_frame_buffer)); + ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeOneFrame()); +} + +TEST_F(ExternalFrameBufferTest, ReallocOneLessByte) { + const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS; + ASSERT_EQ(AOM_CODEC_OK, SetFrameBufferFunctions( + num_buffers, get_aom_one_less_byte_frame_buffer, + release_aom_frame_buffer)); + ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeOneFrame()); +} + +TEST_F(ExternalFrameBufferTest, NullGetFunction) { + const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS; + ASSERT_EQ( + AOM_CODEC_INVALID_PARAM, + SetFrameBufferFunctions(num_buffers, nullptr, release_aom_frame_buffer)); +} + +TEST_F(ExternalFrameBufferTest, NullReleaseFunction) { + const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS; + ASSERT_EQ( + AOM_CODEC_INVALID_PARAM, + SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer, nullptr)); +} + +TEST_F(ExternalFrameBufferTest, SetAfterDecode) { + const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS; + ASSERT_EQ(AOM_CODEC_OK, DecodeOneFrame()); + ASSERT_EQ(AOM_CODEC_ERROR, + SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer, + release_aom_frame_buffer)); +} + +TEST_F(ExternalFrameBufferNonRefTest, ReleaseNonRefFrameBuffer) { + const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS; + ASSERT_EQ(AOM_CODEC_OK, + SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer, + release_aom_frame_buffer)); + ASSERT_EQ(AOM_CODEC_OK, DecodeRemainingFrames()); + CheckFrameBufferRelease(); +} +#endif // CONFIG_WEBM_IO + +AV1_INSTANTIATE_TEST_SUITE( + ExternalFrameBufferMD5Test, + ::testing::ValuesIn(libaom_test::kAV1TestVectors, + libaom_test::kAV1TestVectors + + libaom_test::kNumAV1TestVectors)); +} // namespace diff --git a/third_party/aom/test/fdct4x4_test.cc b/third_party/aom/test/fdct4x4_test.cc new file mode 100644 index 0000000000..9cbf208adb --- /dev/null +++ b/third_party/aom/test/fdct4x4_test.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include <tuple> + +#include "aom_dsp/aom_dsp_common.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" +#include "config/aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" +#include "av1/common/entropy.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_ports/mem.h" + +using libaom_test::ACMRandom; + +namespace { + +template <typename OutputType> +using FdctFunc = void (*)(const int16_t *in, OutputType *out, int stride); + +template <typename OutputType> +using FhtFunc = void (*)(const int16_t *in, OutputType *out, int stride, + TxfmParam *txfm_param); + +template <typename OutputType> +using Fdct4x4Param = + std::tuple<FdctFunc<OutputType>, FhtFunc<OutputType>, aom_bit_depth_t, int>; + +#if HAVE_NEON || HAVE_SSE2 +void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride, + TxfmParam * /*txfm_param*/) { + aom_fdct4x4_c(in, out, stride); +} + +void fdct4x4_lp_ref(const int16_t *in, int16_t *out, int stride, + TxfmParam * /*txfm_param*/) { + aom_fdct4x4_lp_c(in, out, stride); +} +#endif + +template <typename OutputType> +class Trans4x4FDCT : public libaom_test::TransformTestBase<OutputType>, + public ::testing::TestWithParam<Fdct4x4Param<OutputType>> { + public: + ~Trans4x4FDCT() override = default; + + using TxfmBaseOutType = libaom_test::TransformTestBase<OutputType>; + void SetUp() override { + fwd_txfm_ = std::get<0>(this->GetParam()); + TxfmBaseOutType::pitch_ = 4; + TxfmBaseOutType::height_ = 4; + TxfmBaseOutType::fwd_txfm_ref = std::get<1>(this->GetParam()); + TxfmBaseOutType::bit_depth_ = std::get<2>(this->GetParam()); + TxfmBaseOutType::mask_ = (1 << TxfmBaseOutType::bit_depth_) - 1; + TxfmBaseOutType::num_coeffs_ = std::get<3>(this->GetParam()); + } + + protected: + void RunFwdTxfm(const int16_t *in, OutputType *out, int stride) override { + fwd_txfm_(in, out, stride); + } + + void RunInvTxfm(const OutputType *out, uint8_t *dst, int stride) override { + (void)out; + (void)dst; + (void)stride; + } + + FdctFunc<OutputType> fwd_txfm_; +}; + +using Trans4x4FDCTTranLow = Trans4x4FDCT<tran_low_t>; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Trans4x4FDCTTranLow); +TEST_P(Trans4x4FDCTTranLow, CoeffCheck) { RunCoeffCheck(); } +TEST_P(Trans4x4FDCTTranLow, MemCheck) { RunMemCheck(); } + +using Trans4x4FDCTInt16 = Trans4x4FDCT<int16_t>; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Trans4x4FDCTInt16); +TEST_P(Trans4x4FDCTInt16, CoeffCheck) { RunCoeffCheck(); } +TEST_P(Trans4x4FDCTInt16, MemCheck) { RunMemCheck(); } + +using std::make_tuple; + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, Trans4x4FDCTTranLow, + ::testing::Values(make_tuple(&aom_fdct4x4_neon, + &fdct4x4_ref, AOM_BITS_8, + 16))); + +INSTANTIATE_TEST_SUITE_P(NEON, Trans4x4FDCTInt16, + ::testing::Values(make_tuple(&aom_fdct4x4_lp_neon, + &fdct4x4_lp_ref, + AOM_BITS_8, 16))); +#endif + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, Trans4x4FDCTTranLow, + ::testing::Values(make_tuple(&aom_fdct4x4_sse2, + &fdct4x4_ref, AOM_BITS_8, + 16))); + +INSTANTIATE_TEST_SUITE_P(SSE2, Trans4x4FDCTInt16, + ::testing::Values(make_tuple(&aom_fdct4x4_lp_sse2, + &fdct4x4_lp_ref, + AOM_BITS_8, 16))); +#endif +} // namespace diff --git a/third_party/aom/test/fft_test.cc b/third_party/aom/test/fft_test.cc new file mode 100644 index 0000000000..06a17a3f8f --- /dev/null +++ b/third_party/aom/test/fft_test.cc @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> + +#include <algorithm> +#include <complex> +#include <ostream> +#include <vector> + +#include "aom_dsp/fft_common.h" +#include "aom_mem/aom_mem.h" +#include "av1/common/common.h" +#include "config/aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +typedef void (*tform_fun_t)(const float *input, float *temp, float *output); + +// Simple 1D FFT implementation +template <typename InputType> +void fft(const InputType *data, std::complex<float> *result, int n) { + if (n == 1) { + result[0] = data[0]; + return; + } + std::vector<InputType> temp(n); + for (int k = 0; k < n / 2; ++k) { + temp[k] = data[2 * k]; + temp[n / 2 + k] = data[2 * k + 1]; + } + fft(&temp[0], result, n / 2); + fft(&temp[n / 2], result + n / 2, n / 2); + for (int k = 0; k < n / 2; ++k) { + std::complex<float> w = std::complex<float>((float)cos(2. * PI * k / n), + (float)-sin(2. * PI * k / n)); + std::complex<float> a = result[k]; + std::complex<float> b = result[n / 2 + k]; + result[k] = a + w * b; + result[n / 2 + k] = a - w * b; + } +} + +void transpose(std::vector<std::complex<float> > *data, int n) { + for (int y = 0; y < n; ++y) { + for (int x = y + 1; x < n; ++x) { + std::swap((*data)[y * n + x], (*data)[x * n + y]); + } + } +} + +// Simple 2D FFT implementation +template <class InputType> +std::vector<std::complex<float> > fft2d(const InputType *input, int n) { + std::vector<std::complex<float> > rowfft(n * n); + std::vector<std::complex<float> > result(n * n); + for (int y = 0; y < n; ++y) { + fft(input + y * n, &rowfft[y * n], n); + } + transpose(&rowfft, n); + for (int y = 0; y < n; ++y) { + fft(&rowfft[y * n], &result[y * n], n); + } + transpose(&result, n); + return result; +} + +struct FFTTestArg { + int n; + void (*fft)(const float *input, float *temp, float *output); + FFTTestArg(int n_in, tform_fun_t fft_in) : n(n_in), fft(fft_in) {} +}; + +std::ostream &operator<<(std::ostream &os, const FFTTestArg &test_arg) { + return os << "fft_arg { n:" << test_arg.n + << " fft:" << reinterpret_cast<const void *>(test_arg.fft) << " }"; +} + +class FFT2DTest : public ::testing::TestWithParam<FFTTestArg> { + protected: + void SetUp() override { + int n = GetParam().n; + input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n); + temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n); + output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n * 2); + ASSERT_NE(input_, nullptr); + ASSERT_NE(temp_, nullptr); + ASSERT_NE(output_, nullptr); + memset(input_, 0, sizeof(*input_) * n * n); + memset(temp_, 0, sizeof(*temp_) * n * n); + memset(output_, 0, sizeof(*output_) * n * n * 2); + } + void TearDown() override { + aom_free(input_); + aom_free(temp_); + aom_free(output_); + } + float *input_; + float *temp_; + float *output_; +}; + +TEST_P(FFT2DTest, Correct) { + int n = GetParam().n; + for (int i = 0; i < n * n; ++i) { + input_[i] = 1; + std::vector<std::complex<float> > expected = fft2d<float>(&input_[0], n); + GetParam().fft(&input_[0], &temp_[0], &output_[0]); + for (int y = 0; y < n; ++y) { + for (int x = 0; x < (n / 2) + 1; ++x) { + EXPECT_NEAR(expected[y * n + x].real(), output_[2 * (y * n + x)], 1e-5); + EXPECT_NEAR(expected[y * n + x].imag(), output_[2 * (y * n + x) + 1], + 1e-5); + } + } + input_[i] = 0; + } +} + +TEST_P(FFT2DTest, Benchmark) { + int n = GetParam().n; + float sum = 0; + const int num_trials = 1000 * (64 - n); + for (int i = 0; i < num_trials; ++i) { + input_[i % (n * n)] = 1; + GetParam().fft(&input_[0], &temp_[0], &output_[0]); + sum += output_[0]; + input_[i % (n * n)] = 0; + } + EXPECT_NEAR(sum, num_trials, 1e-3); +} + +INSTANTIATE_TEST_SUITE_P(C, FFT2DTest, + ::testing::Values(FFTTestArg(2, aom_fft2x2_float_c), + FFTTestArg(4, aom_fft4x4_float_c), + FFTTestArg(8, aom_fft8x8_float_c), + FFTTestArg(16, aom_fft16x16_float_c), + FFTTestArg(32, + aom_fft32x32_float_c))); +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, FFT2DTest, + ::testing::Values(FFTTestArg(4, aom_fft4x4_float_sse2), + FFTTestArg(8, aom_fft8x8_float_sse2), + FFTTestArg(16, aom_fft16x16_float_sse2), + FFTTestArg(32, aom_fft32x32_float_sse2))); +#endif // HAVE_SSE2 +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, FFT2DTest, + ::testing::Values(FFTTestArg(8, aom_fft8x8_float_avx2), + FFTTestArg(16, aom_fft16x16_float_avx2), + FFTTestArg(32, aom_fft32x32_float_avx2))); +#endif // HAVE_AVX2 +#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64 + +struct IFFTTestArg { + int n; + tform_fun_t ifft; + IFFTTestArg(int n_in, tform_fun_t ifft_in) : n(n_in), ifft(ifft_in) {} +}; + +std::ostream &operator<<(std::ostream &os, const IFFTTestArg &test_arg) { + return os << "ifft_arg { n:" << test_arg.n + << " fft:" << reinterpret_cast<const void *>(test_arg.ifft) << " }"; +} + +class IFFT2DTest : public ::testing::TestWithParam<IFFTTestArg> { + protected: + void SetUp() override { + int n = GetParam().n; + input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n * 2); + temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n * 2); + output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n); + ASSERT_NE(input_, nullptr); + ASSERT_NE(temp_, nullptr); + ASSERT_NE(output_, nullptr); + memset(input_, 0, sizeof(*input_) * n * n * 2); + memset(temp_, 0, sizeof(*temp_) * n * n * 2); + memset(output_, 0, sizeof(*output_) * n * n); + } + void TearDown() override { + aom_free(input_); + aom_free(temp_); + aom_free(output_); + } + float *input_; + float *temp_; + float *output_; +}; + +TEST_P(IFFT2DTest, Correctness) { + int n = GetParam().n; + ASSERT_GE(n, 2); + std::vector<float> expected(n * n); + std::vector<float> actual(n * n); + // Do forward transform then invert to make sure we get back expected + for (int y = 0; y < n; ++y) { + for (int x = 0; x < n; ++x) { + expected[y * n + x] = 1; + std::vector<std::complex<float> > input_c = fft2d(&expected[0], n); + for (int i = 0; i < n * n; ++i) { + input_[2 * i + 0] = input_c[i].real(); + input_[2 * i + 1] = input_c[i].imag(); + } + GetParam().ifft(&input_[0], &temp_[0], &output_[0]); + + for (int yy = 0; yy < n; ++yy) { + for (int xx = 0; xx < n; ++xx) { + EXPECT_NEAR(expected[yy * n + xx], output_[yy * n + xx] / (n * n), + 1e-5); + } + } + expected[y * n + x] = 0; + } + } +} + +TEST_P(IFFT2DTest, Benchmark) { + int n = GetParam().n; + float sum = 0; + const int num_trials = 1000 * (64 - n); + for (int i = 0; i < num_trials; ++i) { + input_[i % (n * n)] = 1; + GetParam().ifft(&input_[0], &temp_[0], &output_[0]); + sum += output_[0]; + input_[i % (n * n)] = 0; + } + EXPECT_GE(sum, num_trials / 2); +} +INSTANTIATE_TEST_SUITE_P( + C, IFFT2DTest, + ::testing::Values(IFFTTestArg(2, aom_ifft2x2_float_c), + IFFTTestArg(4, aom_ifft4x4_float_c), + IFFTTestArg(8, aom_ifft8x8_float_c), + IFFTTestArg(16, aom_ifft16x16_float_c), + IFFTTestArg(32, aom_ifft32x32_float_c))); +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, IFFT2DTest, + ::testing::Values(IFFTTestArg(4, aom_ifft4x4_float_sse2), + IFFTTestArg(8, aom_ifft8x8_float_sse2), + IFFTTestArg(16, aom_ifft16x16_float_sse2), + IFFTTestArg(32, aom_ifft32x32_float_sse2))); +#endif // HAVE_SSE2 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, IFFT2DTest, + ::testing::Values(IFFTTestArg(8, aom_ifft8x8_float_avx2), + IFFTTestArg(16, aom_ifft16x16_float_avx2), + IFFTTestArg(32, aom_ifft32x32_float_avx2))); +#endif // HAVE_AVX2 +#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64 + +} // namespace diff --git a/third_party/aom/test/film_grain_table_test.cc b/third_party/aom/test/film_grain_table_test.cc new file mode 100644 index 0000000000..808d966feb --- /dev/null +++ b/third_party/aom/test/film_grain_table_test.cc @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "aom_dsp/grain_table.h" +#include "aom/internal/aom_codec_internal.h" +#include "av1/encoder/grain_test_vectors.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/video_source.h" + +void grain_equal(const aom_film_grain_t *expected, + const aom_film_grain_t *actual) { + EXPECT_EQ(expected->apply_grain, actual->apply_grain); + EXPECT_EQ(expected->update_parameters, actual->update_parameters); + if (!expected->update_parameters) return; + EXPECT_EQ(expected->num_y_points, actual->num_y_points); + EXPECT_EQ(expected->num_cb_points, actual->num_cb_points); + EXPECT_EQ(expected->num_cr_points, actual->num_cr_points); + EXPECT_EQ(0, memcmp(expected->scaling_points_y, actual->scaling_points_y, + expected->num_y_points * + sizeof(expected->scaling_points_y[0]))); + EXPECT_EQ(0, memcmp(expected->scaling_points_cb, actual->scaling_points_cb, + expected->num_cb_points * + sizeof(expected->scaling_points_cb[0]))); + EXPECT_EQ(0, memcmp(expected->scaling_points_cr, actual->scaling_points_cr, + expected->num_cr_points * + sizeof(expected->scaling_points_cr[0]))); + EXPECT_EQ(expected->scaling_shift, actual->scaling_shift); + EXPECT_EQ(expected->ar_coeff_lag, actual->ar_coeff_lag); + EXPECT_EQ(expected->ar_coeff_shift, actual->ar_coeff_shift); + + const int num_pos_luma = + 2 * expected->ar_coeff_lag * (expected->ar_coeff_lag + 1); + const int num_pos_chroma = num_pos_luma; + EXPECT_EQ(0, memcmp(expected->ar_coeffs_y, actual->ar_coeffs_y, + sizeof(expected->ar_coeffs_y[0]) * num_pos_luma)); + if (actual->num_cb_points || actual->chroma_scaling_from_luma) { + EXPECT_EQ(0, memcmp(expected->ar_coeffs_cb, actual->ar_coeffs_cb, + sizeof(expected->ar_coeffs_cb[0]) * num_pos_chroma)); + } + if (actual->num_cr_points || actual->chroma_scaling_from_luma) { + EXPECT_EQ(0, memcmp(expected->ar_coeffs_cr, actual->ar_coeffs_cr, + sizeof(expected->ar_coeffs_cr[0]) * num_pos_chroma)); + } + EXPECT_EQ(expected->overlap_flag, actual->overlap_flag); + EXPECT_EQ(expected->chroma_scaling_from_luma, + actual->chroma_scaling_from_luma); + EXPECT_EQ(expected->grain_scale_shift, actual->grain_scale_shift); + // EXPECT_EQ(expected->random_seed, actual->random_seed); + + // clip_to_restricted and bit_depth aren't written + if (expected->num_cb_points) { + EXPECT_EQ(expected->cb_mult, actual->cb_mult); + EXPECT_EQ(expected->cb_luma_mult, actual->cb_luma_mult); + EXPECT_EQ(expected->cb_offset, actual->cb_offset); + } + if (expected->num_cr_points) { + EXPECT_EQ(expected->cr_mult, actual->cr_mult); + EXPECT_EQ(expected->cr_luma_mult, actual->cr_luma_mult); + EXPECT_EQ(expected->cr_offset, actual->cr_offset); + } +} + +TEST(FilmGrainTableTest, AddAndLookupSingleSegment) { + aom_film_grain_table_t table; + memset(&table, 0, sizeof(table)); + + aom_film_grain_t grain; + EXPECT_FALSE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain)); + + aom_film_grain_table_append(&table, 1000, 2000, film_grain_test_vectors + 0); + EXPECT_FALSE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain)); + EXPECT_FALSE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain)); + + EXPECT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain)); + + grain.bit_depth = film_grain_test_vectors[0].bit_depth; + EXPECT_EQ(0, memcmp(&grain, film_grain_test_vectors + 0, sizeof(table))); + + // Extend the existing segment + aom_film_grain_table_append(&table, 2000, 3000, film_grain_test_vectors + 0); + EXPECT_EQ(nullptr, table.head->next); + + // Lookup and remove and check that the entry is no longer there + EXPECT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, true, &grain)); + EXPECT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain)); + + EXPECT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, true, &grain)); + EXPECT_FALSE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain)); + + EXPECT_EQ(nullptr, table.head); + EXPECT_EQ(nullptr, table.tail); + aom_film_grain_table_free(&table); +} + +TEST(FilmGrainTableTest, AddSingleSegmentRemoveBiggerSegment) { + aom_film_grain_table_t table; + aom_film_grain_t grain; + + memset(&table, 0, sizeof(table)); + + aom_film_grain_table_append(&table, 0, 1000, film_grain_test_vectors + 0); + EXPECT_TRUE(aom_film_grain_table_lookup(&table, 0, 1100, true, &grain)); + + EXPECT_EQ(nullptr, table.head); + EXPECT_EQ(nullptr, table.tail); + aom_film_grain_table_free(&table); +} + +TEST(FilmGrainTableTest, SplitSingleSegment) { + aom_film_grain_table_t table; + aom_film_grain_t grain; + memset(&table, 0, sizeof(table)); + + aom_film_grain_table_append(&table, 0, 1000, film_grain_test_vectors + 0); + + // Test lookup and remove that adjusts start time + EXPECT_TRUE(aom_film_grain_table_lookup(&table, 0, 100, true, &grain)); + EXPECT_EQ(nullptr, table.head->next); + EXPECT_EQ(100, table.head->start_time); + + // Test lookup and remove that adjusts end time + EXPECT_TRUE(aom_film_grain_table_lookup(&table, 900, 1000, true, &grain)); + EXPECT_EQ(nullptr, table.head->next); + EXPECT_EQ(100, table.head->start_time); + EXPECT_EQ(900, table.head->end_time); + + // Test lookup and remove that splits the first entry + EXPECT_TRUE(aom_film_grain_table_lookup(&table, 400, 600, true, &grain)); + EXPECT_EQ(100, table.head->start_time); + EXPECT_EQ(400, table.head->end_time); + + ASSERT_NE(nullptr, table.head->next); + EXPECT_EQ(table.tail, table.head->next); + EXPECT_EQ(600, table.head->next->start_time); + EXPECT_EQ(900, table.head->next->end_time); + + aom_film_grain_table_free(&table); +} + +TEST(FilmGrainTableTest, AddAndLookupMultipleSegments) { + aom_film_grain_table_t table; + memset(&table, 0, sizeof(table)); + + aom_film_grain_t grain; + const int kNumTestVectors = + sizeof(film_grain_test_vectors) / sizeof(film_grain_test_vectors[0]); + for (int i = 0; i < kNumTestVectors; ++i) { + aom_film_grain_table_append(&table, i * 1000, (i + 1) * 1000, + film_grain_test_vectors + i); + } + + for (int i = kNumTestVectors - 1; i >= 0; --i) { + EXPECT_TRUE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000, + true, &grain)); + grain_equal(film_grain_test_vectors + i, &grain); + EXPECT_FALSE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000, + true, &grain)); + } + + // Verify that all the data has been removed + for (int i = 0; i < kNumTestVectors; ++i) { + EXPECT_FALSE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000, + true, &grain)); + } + aom_film_grain_table_free(&table); +} + +class FilmGrainTableIOTest : public ::testing::Test { + protected: + void SetUp() override { memset(&error_, 0, sizeof(error_)); } + struct aom_internal_error_info error_; +}; + +TEST_F(FilmGrainTableIOTest, ReadMissingFile) { + aom_film_grain_table_t table; + memset(&table, 0, sizeof(table)); + ASSERT_EQ(AOM_CODEC_ERROR, aom_film_grain_table_read( + &table, "/path/to/missing/file", &error_)); +} + +TEST_F(FilmGrainTableIOTest, ReadTruncatedFile) { + aom_film_grain_table_t table; + memset(&table, 0, sizeof(table)); + + std::string grain_file; + FILE *file = libaom_test::GetTempOutFile(&grain_file); + ASSERT_NE(file, nullptr); + fwrite("deadbeef", 8, 1, file); + fclose(file); + ASSERT_EQ(AOM_CODEC_ERROR, + aom_film_grain_table_read(&table, grain_file.c_str(), &error_)); + EXPECT_EQ(0, remove(grain_file.c_str())); +} + +TEST_F(FilmGrainTableIOTest, RoundTripReadWrite) { + aom_film_grain_table_t table; + memset(&table, 0, sizeof(table)); + + aom_film_grain_t expected_grain[16]; + const int kNumTestVectors = + sizeof(film_grain_test_vectors) / sizeof(film_grain_test_vectors[0]); + for (int i = 0; i < kNumTestVectors; ++i) { + expected_grain[i] = film_grain_test_vectors[i]; + expected_grain[i].random_seed = i; + expected_grain[i].update_parameters = i % 2; + expected_grain[i].apply_grain = (i + 1) % 2; + expected_grain[i].bit_depth = 0; + aom_film_grain_table_append(&table, i * 1000, (i + 1) * 1000, + expected_grain + i); + } + std::string grain_file; + FILE *tmpfile = libaom_test::GetTempOutFile(&grain_file); + ASSERT_NE(tmpfile, nullptr); + fclose(tmpfile); + ASSERT_EQ(AOM_CODEC_OK, + aom_film_grain_table_write(&table, grain_file.c_str(), &error_)); + aom_film_grain_table_free(&table); + + memset(&table, 0, sizeof(table)); + ASSERT_EQ(AOM_CODEC_OK, + aom_film_grain_table_read(&table, grain_file.c_str(), &error_)); + for (int i = 0; i < kNumTestVectors; ++i) { + aom_film_grain_t grain; + EXPECT_TRUE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000, + true, &grain)); + grain_equal(expected_grain + i, &grain); + } + aom_film_grain_table_free(&table); + EXPECT_EQ(0, remove(grain_file.c_str())); +} + +TEST_F(FilmGrainTableIOTest, RoundTripSplit) { + std::string grain_file; + FILE *tmpfile = libaom_test::GetTempOutFile(&grain_file); + ASSERT_NE(tmpfile, nullptr); + fclose(tmpfile); + + aom_film_grain_table_t table; + memset(&table, 0, sizeof(table)); + + aom_film_grain_t grain = film_grain_test_vectors[0]; + aom_film_grain_table_append(&table, 0, 3000, &grain); + ASSERT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, true, &grain)); + ASSERT_TRUE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain)); + EXPECT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain)); + ASSERT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain)); + ASSERT_EQ(AOM_CODEC_OK, + aom_film_grain_table_write(&table, grain_file.c_str(), &error_)); + aom_film_grain_table_free(&table); + + memset(&table, 0, sizeof(table)); + ASSERT_EQ(AOM_CODEC_OK, + aom_film_grain_table_read(&table, grain_file.c_str(), &error_)); + ASSERT_TRUE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain)); + ASSERT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain)); + ASSERT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain)); + aom_film_grain_table_free(&table); + + EXPECT_EQ(0, remove(grain_file.c_str())); +} + +const ::libaom_test::TestMode kFilmGrainEncodeTestModes[] = { + ::libaom_test::kRealTime, +#if !CONFIG_REALTIME_ONLY + ::libaom_test::kOnePassGood +#endif +}; + +class FilmGrainEncodeTest + : public ::libaom_test::CodecTestWith3Params<int, int, + ::libaom_test::TestMode>, + public ::libaom_test::EncoderTest { + protected: + FilmGrainEncodeTest() + : EncoderTest(GET_PARAM(0)), test_monochrome_(GET_PARAM(1)), + key_frame_dist_(GET_PARAM(2)), test_mode_(GET_PARAM(3)) {} + ~FilmGrainEncodeTest() override = default; + + void SetUp() override { + InitializeConfig(test_mode_); + cfg_.monochrome = test_monochrome_ == 1; + cfg_.rc_target_bitrate = 300; + cfg_.kf_max_dist = key_frame_dist_; + cfg_.g_lag_in_frames = 0; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, + test_mode_ == ::libaom_test::kRealTime ? 7 : 5); + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_FILM); + encoder->Control(AV1E_SET_DENOISE_NOISE_LEVEL, 1); + } else if (video->frame() == 1) { + cfg_.monochrome = (test_monochrome_ == 1 || test_monochrome_ == 2); + encoder->Config(&cfg_); + } else { + cfg_.monochrome = test_monochrome_ == 1; + encoder->Config(&cfg_); + } + } + + bool DoDecode() const override { return false; } + + void DoTest() { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 3); + cfg_.g_w = video.img()->d_w; + cfg_.g_h = video.img()->d_h; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + private: + // 0: monochroome always off. + // 1: monochrome always on. + // 2: monochrome changes from 0, 1, 0, for encoded frames 0, 1, 2. + // The case where monochrome changes from 1 to 0 (i.e., encoder initialized + // with monochrome = 1 and then subsequently encoded with monochrome = 0) + // will fail. The test InitMonochrome1_EncodeMonochrome0 below verifies this. + int test_monochrome_; + int key_frame_dist_; + ::libaom_test::TestMode test_mode_; +}; + +TEST_P(FilmGrainEncodeTest, Test) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(FilmGrainEncodeTest, ::testing::Range(0, 3), + ::testing::Values(0, 10), + ::testing::ValuesIn(kFilmGrainEncodeTestModes)); + +// Initialize encoder with monochrome = 1, and then encode frame with +// monochrome = 0. This will result in an error: see the following check +// in encoder_set_config() in av1/av1_cx_iface.c. +// TODO(marpan): Consider moving this test to another file, as the failure +// has nothing to do with film grain mode. +TEST(FilmGrainEncodeTest, InitMonochrome1EncodeMonochrome0) { + const int kWidth = 352; + const int kHeight = 288; + const int usage = AOM_USAGE_REALTIME; + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage), AOM_CODEC_OK); + aom_codec_ctx_t enc; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + // Initialize encoder, with monochrome = 0. + cfg.monochrome = 1; + aom_codec_err_t init_status = aom_codec_enc_init(&enc, iface, &cfg, 0); + ASSERT_EQ(init_status, AOM_CODEC_OK); + ASSERT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 7), AOM_CODEC_OK); + ASSERT_EQ(aom_codec_control(&enc, AV1E_SET_TUNE_CONTENT, AOM_CONTENT_FILM), + AOM_CODEC_OK); + ASSERT_EQ(aom_codec_control(&enc, AV1E_SET_DENOISE_NOISE_LEVEL, 1), + AOM_CODEC_OK); + // Set image with zero values. + constexpr size_t kBufferSize = + kWidth * kHeight + 2 * (kWidth + 1) / 2 * (kHeight + 1) / 2; + std::vector<unsigned char> buffer(kBufferSize); + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1, + buffer.data())); + // Encode first frame. + ASSERT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK); + // Second frame: update config with monochrome = 1. + cfg.monochrome = 0; + ASSERT_EQ(aom_codec_enc_config_set(&enc, &cfg), AOM_CODEC_INVALID_PARAM); + ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} diff --git a/third_party/aom/test/filterintra_test.cc b/third_party/aom/test/filterintra_test.cc new file mode 100644 index 0000000000..0a0ab11dc3 --- /dev/null +++ b/third_party/aom/test/filterintra_test.cc @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/enums.h" + +namespace { + +using libaom_test::ACMRandom; +using std::tuple; + +typedef void (*Predictor)(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, + const uint8_t *above, const uint8_t *left, int mode); + +// Note: +// Test parameter list: +// Reference predictor, optimized predictor, prediction mode, tx size +// +typedef tuple<Predictor, Predictor, int> PredFuncMode; +typedef tuple<PredFuncMode, TX_SIZE> PredParams; + +const int MaxTxSize = 32; + +const int MaxTestNum = 100; + +class AV1FilterIntraPredTest : public ::testing::TestWithParam<PredParams> { + public: + ~AV1FilterIntraPredTest() override = default; + void SetUp() override { + PredFuncMode funcMode = GET_PARAM(0); + predFuncRef_ = std::get<0>(funcMode); + predFunc_ = std::get<1>(funcMode); + mode_ = std::get<2>(funcMode); + txSize_ = GET_PARAM(1); + + alloc_ = new uint8_t[2 * MaxTxSize + 1]; + predRef_ = new uint8_t[MaxTxSize * MaxTxSize]; + pred_ = new uint8_t[MaxTxSize * MaxTxSize]; + ASSERT_NE(alloc_, nullptr); + ASSERT_NE(predRef_, nullptr); + ASSERT_NE(pred_, nullptr); + } + + void TearDown() override { + delete[] alloc_; + delete[] predRef_; + delete[] pred_; + } + + protected: + void RunTest() const { + int tstIndex = 0; + int stride = tx_size_wide[txSize_]; + uint8_t *left = alloc_; + uint8_t *above = alloc_ + MaxTxSize; + while (tstIndex < MaxTestNum) { + PrepareBuffer(); + predFuncRef_(predRef_, stride, txSize_, &above[1], left, mode_); + API_REGISTER_STATE_CHECK( + predFunc_(pred_, stride, txSize_, &above[1], left, mode_)); + DiffPred(tstIndex); + tstIndex += 1; + } + } + void RunSpeedTest() const { + int stride = tx_size_wide[txSize_]; + uint8_t *left = alloc_; + uint8_t *above = alloc_ + MaxTxSize; + const int numIter = 5000; + + PrepareBuffer(); + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < numIter; i++) { + predFuncRef_(predRef_, stride, txSize_, &above[1], left, mode_); + } + aom_usec_timer_mark(&ref_timer); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < numIter; i++) { + predFunc_(pred_, stride, txSize_, &above[1], left, mode_); + } + aom_usec_timer_mark(&timer); + + const int ref_sum_time = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + const int sum_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + + printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \t mode = %d \n", + ref_sum_time, sum_time, + (static_cast<float>(ref_sum_time) / static_cast<float>(sum_time)), + static_cast<int>(mode_)); + + DiffPred(0); + } + + private: + void PrepareBuffer() const { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int i = 0; + while (i < (2 * MaxTxSize + 1)) { + alloc_[i] = rnd.Rand8(); + i++; + } + } + + void DiffPred(int testNum) const { + int i = 0; + while (i < tx_size_wide[txSize_] * tx_size_high[txSize_]) { + EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " " + << "Tx size: " << tx_size_wide[txSize_] + << "x" << tx_size_high[txSize_] << " " + << "Test number: " << testNum; + i++; + } + } + + Predictor predFunc_; + Predictor predFuncRef_; + int mode_; + TX_SIZE txSize_; + uint8_t *alloc_; + uint8_t *pred_; + uint8_t *predRef_; +}; + +TEST_P(AV1FilterIntraPredTest, BitExactCheck) { RunTest(); } + +TEST_P(AV1FilterIntraPredTest, DISABLED_Speed) { RunSpeedTest(); } + +using ::testing::make_tuple; +#if HAVE_SSE4_1 +const PredFuncMode kPredFuncMdArray[] = { + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1, + FILTER_DC_PRED), + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1, + FILTER_V_PRED), + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1, + FILTER_H_PRED), + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1, + FILTER_D157_PRED), + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1, + FILTER_PAETH_PRED), +}; + +const TX_SIZE kTxSize[] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_4X8, + TX_8X4, TX_8X16, TX_16X8, TX_16X32, TX_32X16, + TX_4X16, TX_16X4, TX_8X32, TX_32X8 }; + +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1FilterIntraPredTest, + ::testing::Combine(::testing::ValuesIn(kPredFuncMdArray), + ::testing::ValuesIn(kTxSize))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +const PredFuncMode kPredFuncMdArrayNEON[] = { + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon, + FILTER_DC_PRED), + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon, + FILTER_V_PRED), + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon, + FILTER_H_PRED), + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon, + FILTER_D157_PRED), + make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_neon, + FILTER_PAETH_PRED), +}; + +const TX_SIZE kTxSizeNEON[] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_4X8, + TX_8X4, TX_8X16, TX_16X8, TX_16X32, TX_32X16, + TX_4X16, TX_16X4, TX_8X32, TX_32X8 }; + +INSTANTIATE_TEST_SUITE_P( + NEON, AV1FilterIntraPredTest, + ::testing::Combine(::testing::ValuesIn(kPredFuncMdArrayNEON), + ::testing::ValuesIn(kTxSizeNEON))); +#endif // HAVE_NEON + +} // namespace diff --git a/third_party/aom/test/firstpass_test.cc b/third_party/aom/test/firstpass_test.cc new file mode 100644 index 0000000000..1f4f3b7853 --- /dev/null +++ b/third_party/aom/test/firstpass_test.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stddef.h> + +#include "av1/common/common.h" +#include "av1/encoder/firstpass.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +TEST(FirstpassTest, FirstpassInfoInitWithExtBuf) { + FIRSTPASS_INFO firstpass_info; + FIRSTPASS_STATS ext_stats_buf[10]; + const int ref_stats_size = 10; + for (int i = 0; i < ref_stats_size; ++i) { + av1_zero(ext_stats_buf[i]); + ext_stats_buf[i].frame = i; + } + aom_codec_err_t ret = + av1_firstpass_info_init(&firstpass_info, ext_stats_buf, 10); + EXPECT_EQ(firstpass_info.stats_count, ref_stats_size); + EXPECT_EQ(firstpass_info.future_stats_count + firstpass_info.past_stats_count, + firstpass_info.stats_count); + EXPECT_EQ(firstpass_info.cur_index, 0); + EXPECT_EQ(ret, AOM_CODEC_OK); +} + +TEST(FirstpassTest, FirstpassInfoInitWithStaticBuf) { + FIRSTPASS_INFO firstpass_info; + aom_codec_err_t ret = av1_firstpass_info_init(&firstpass_info, nullptr, 0); + EXPECT_EQ(firstpass_info.stats_count, 0); + EXPECT_EQ(firstpass_info.cur_index, 0); + EXPECT_EQ(ret, AOM_CODEC_OK); +} + +TEST(FirstpassTest, FirstpassInfoPushPop) { + FIRSTPASS_INFO firstpass_info; + av1_firstpass_info_init(&firstpass_info, nullptr, 0); + EXPECT_EQ(firstpass_info.stats_buf_size, FIRSTPASS_INFO_STATIC_BUF_SIZE); + for (int i = 0; i < FIRSTPASS_INFO_STATIC_BUF_SIZE; ++i) { + FIRSTPASS_STATS stats; + av1_zero(stats); + stats.frame = i; + aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats); + EXPECT_EQ(ret, AOM_CODEC_OK); + } + EXPECT_EQ(firstpass_info.stats_count, FIRSTPASS_INFO_STATIC_BUF_SIZE); + const int pop_count = FIRSTPASS_INFO_STATIC_BUF_SIZE / 2; + for (int i = 0; i < pop_count; ++i) { + const FIRSTPASS_STATS *stats = av1_firstpass_info_peek(&firstpass_info, 0); + aom_codec_err_t ret = + av1_firstpass_info_move_cur_index_and_pop(&firstpass_info); + EXPECT_NE(stats, nullptr); + EXPECT_EQ(stats->frame, i); + EXPECT_EQ(ret, AOM_CODEC_OK); + } + EXPECT_EQ(firstpass_info.stats_count, + FIRSTPASS_INFO_STATIC_BUF_SIZE - pop_count); + + const int push_count = FIRSTPASS_INFO_STATIC_BUF_SIZE / 2; + for (int i = 0; i < push_count; ++i) { + FIRSTPASS_STATS stats; + av1_zero(stats); + aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats); + EXPECT_EQ(ret, AOM_CODEC_OK); + } + EXPECT_EQ(firstpass_info.stats_count, FIRSTPASS_INFO_STATIC_BUF_SIZE); + + EXPECT_EQ(firstpass_info.stats_count, firstpass_info.stats_buf_size); + { + // Push the stats when the queue is full. + FIRSTPASS_STATS stats; + av1_zero(stats); + aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats); + EXPECT_EQ(ret, AOM_CODEC_ERROR); + } +} + +TEST(FirstpassTest, FirstpassInfoTotalStats) { + FIRSTPASS_INFO firstpass_info; + av1_firstpass_info_init(&firstpass_info, nullptr, 0); + EXPECT_EQ(firstpass_info.total_stats.frame, 0); + for (int i = 0; i < 10; ++i) { + FIRSTPASS_STATS stats; + av1_zero(stats); + stats.count = 1; + av1_firstpass_info_push(&firstpass_info, &stats); + } + EXPECT_EQ(firstpass_info.total_stats.count, 10); +} + +TEST(FirstpassTest, FirstpassInfoMoveCurr) { + FIRSTPASS_INFO firstpass_info; + av1_firstpass_info_init(&firstpass_info, nullptr, 0); + int frame_cnt = 0; + EXPECT_EQ(firstpass_info.stats_buf_size, FIRSTPASS_INFO_STATIC_BUF_SIZE); + for (int i = 0; i < FIRSTPASS_INFO_STATIC_BUF_SIZE; ++i) { + FIRSTPASS_STATS stats; + av1_zero(stats); + stats.frame = frame_cnt; + ++frame_cnt; + aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats); + EXPECT_EQ(ret, AOM_CODEC_OK); + } + EXPECT_EQ(firstpass_info.cur_index, firstpass_info.start_index); + { + aom_codec_err_t ret = av1_firstpass_info_pop(&firstpass_info); + // We cannot pop when cur_index == start_index + EXPECT_EQ(ret, AOM_CODEC_ERROR); + } + int ref_frame_cnt = 0; + const int move_count = FIRSTPASS_INFO_STATIC_BUF_SIZE * 2 / 3; + for (int i = 0; i < move_count; ++i) { + const FIRSTPASS_STATS *this_stats = + av1_firstpass_info_peek(&firstpass_info, 0); + EXPECT_EQ(this_stats->frame, ref_frame_cnt); + ++ref_frame_cnt; + av1_firstpass_info_move_cur_index(&firstpass_info); + } + EXPECT_EQ(firstpass_info.future_stats_count, + FIRSTPASS_INFO_STATIC_BUF_SIZE - move_count); + EXPECT_EQ(firstpass_info.past_stats_count, move_count); + EXPECT_EQ(firstpass_info.stats_count, FIRSTPASS_INFO_STATIC_BUF_SIZE); + + const int test_count = FIRSTPASS_INFO_STATIC_BUF_SIZE / 2; + for (int i = 0; i < test_count; ++i) { + aom_codec_err_t ret = av1_firstpass_info_pop(&firstpass_info); + EXPECT_EQ(ret, AOM_CODEC_OK); + } + + // Pop #test_count stats + for (int i = 0; i < test_count; ++i) { + FIRSTPASS_STATS stats; + av1_zero(stats); + stats.frame = frame_cnt; + ++frame_cnt; + aom_codec_err_t ret = av1_firstpass_info_push(&firstpass_info, &stats); + EXPECT_EQ(ret, AOM_CODEC_OK); + } + + // peek and move #test_count stats + for (int i = 0; i < test_count; ++i) { + const FIRSTPASS_STATS *this_stats = + av1_firstpass_info_peek(&firstpass_info, 0); + EXPECT_EQ(this_stats->frame, ref_frame_cnt); + ++ref_frame_cnt; + av1_firstpass_info_move_cur_index(&firstpass_info); + } + + // pop #test_count stats + for (int i = 0; i < test_count; ++i) { + aom_codec_err_t ret = av1_firstpass_info_pop(&firstpass_info); + EXPECT_EQ(ret, AOM_CODEC_OK); + } +} + +} // namespace diff --git a/third_party/aom/test/force_key_frame_test.cc b/third_party/aom/test/force_key_frame_test.cc new file mode 100644 index 0000000000..2b85d26530 --- /dev/null +++ b/third_party/aom/test/force_key_frame_test.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2022, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Tests for https://crbug.com/aomedia/3327. +// +// In good-quality mode, set cfg.g_lag_in_frames to 1 or 0 and encode two +// frames in one-pass mode. Pass AOM_EFLAG_FORCE_KF to the second +// aom_codec_encode() call. Both frames should be encoded as key frames. + +#include <memory> + +#include "aom/aomcx.h" +#include "aom/aom_encoder.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +void TestOnePassMode(unsigned int lag_in_frames) { + // A buffer of gray samples of size 128x128, YUV 4:2:0. + constexpr size_t kImageDataSize = 128 * 128 + 2 * 64 * 64; + std::unique_ptr<unsigned char[]> img_data(new unsigned char[kImageDataSize]); + ASSERT_NE(img_data, nullptr); + memset(img_data.get(), 128, kImageDataSize); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY)); + cfg.g_w = 128; + cfg.g_h = 128; + cfg.g_pass = AOM_RC_ONE_PASS; + cfg.g_lag_in_frames = lag_in_frames; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + + aom_image_t img; + EXPECT_EQ(&img, + aom_img_wrap(&img, AOM_IMG_FMT_I420, 128, 128, 1, img_data.get())); + + aom_codec_iter_t iter; + const aom_codec_cx_pkt_t *pkt; + int frame_count = 0; + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + + iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u) + << "frame " << frame_count; + frame_count++; + } + + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_encode(&enc, &img, 1, 1, AOM_EFLAG_FORCE_KF)); + + iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u) + << "frame " << frame_count; + frame_count++; + } + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + + iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u) + << "frame " << frame_count; + frame_count++; + } + + EXPECT_EQ(frame_count, 2); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(ForceKeyFrameTest, OnePassModeLag0) { TestOnePassMode(0); } + +TEST(ForceKeyFrameTest, OnePassModeLag1) { TestOnePassMode(1); } + +TEST(ForceKeyFrameTest, OnePassModeLag2) { TestOnePassMode(2); } + +} // namespace diff --git a/third_party/aom/test/forced_max_frame_width_height_test.cc b/third_party/aom/test/forced_max_frame_width_height_test.cc new file mode 100644 index 0000000000..3347713c5b --- /dev/null +++ b/third_party/aom/test/forced_max_frame_width_height_test.cc @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2022, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Tests for https://crbug.com/aomedia/3326. +// +// Set cfg.g_forced_max_frame_width and cfg.g_forced_max_frame_height and +// encode two frames of increasing sizes. The second aom_codec_encode() should +// not crash or have memory errors. + +#include <algorithm> +#include <memory> +#include <vector> + +#include "aom/aomcx.h" +#include "aom/aom_encoder.h" +#include "config/aom_config.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +// cfg.g_lag_in_frames must be set to 0 or 1 to allow the frame size to change, +// as required by the following check in encoder_set_config() in +// av1/av1_cx_iface.c: +// +// if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h) { +// if (cfg->g_lag_in_frames > 1 || cfg->g_pass != AOM_RC_ONE_PASS) +// ERROR("Cannot change width or height after initialization"); +// ... +// } + +void RunTest(unsigned int usage, unsigned int lag_in_frames, + const char *tune_metric) { + // A buffer of gray samples. Large enough for 128x128 and 256x256, YUV 4:2:0. + constexpr size_t kImageDataSize = 256 * 256 + 2 * 128 * 128; + std::unique_ptr<unsigned char[]> img_data(new unsigned char[kImageDataSize]); + ASSERT_NE(img_data, nullptr); + memset(img_data.get(), 128, kImageDataSize); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, usage)); + cfg.g_w = 128; + cfg.g_h = 128; + cfg.g_forced_max_frame_width = 256; + cfg.g_forced_max_frame_height = 256; + cfg.g_lag_in_frames = lag_in_frames; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_set_option(&enc, "tune", tune_metric)); + + aom_image_t img; + EXPECT_EQ(&img, + aom_img_wrap(&img, AOM_IMG_FMT_I420, 128, 128, 1, img_data.get())); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + + cfg.g_w = 256; + cfg.g_h = 256; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg)); + + EXPECT_EQ(&img, + aom_img_wrap(&img, AOM_IMG_FMT_I420, 256, 256, 1, img_data.get())); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +#if !CONFIG_REALTIME_ONLY + +TEST(EncodeForcedMaxFrameWidthHeight, GoodQualityLag0TunePSNR) { + RunTest(AOM_USAGE_GOOD_QUALITY, /*lag_in_frames=*/0, "psnr"); +} + +TEST(EncodeForcedMaxFrameWidthHeight, GoodQualityLag0TuneSSIM) { + RunTest(AOM_USAGE_GOOD_QUALITY, /*lag_in_frames=*/0, "ssim"); +} + +TEST(EncodeForcedMaxFrameWidthHeight, GoodQualityLag1TunePSNR) { + RunTest(AOM_USAGE_GOOD_QUALITY, /*lag_in_frames=*/1, "psnr"); +} + +TEST(EncodeForcedMaxFrameWidthHeight, GoodQualityLag1TuneSSIM) { + RunTest(AOM_USAGE_GOOD_QUALITY, /*lag_in_frames=*/1, "ssim"); +} + +void FillImageGradient(aom_image_t *image, int bit_depth) { + assert(image->range == AOM_CR_FULL_RANGE); + for (int plane = 0; plane < 3; plane++) { + const int plane_width = aom_img_plane_width(image, plane); + const int plane_height = aom_img_plane_height(image, plane); + unsigned char *row = image->planes[plane]; + const int stride = image->stride[plane]; + for (int y = 0; y < plane_height; ++y) { + for (int x = 0; x < plane_width; ++x) { + const int value = (x + y) * ((1 << bit_depth) - 1) / + std::max(1, plane_width + plane_height - 2); + assert(value >= 0 && value <= (1 << bit_depth) - 1); + if (bit_depth > 8) { + reinterpret_cast<uint16_t *>(row)[x] = static_cast<uint16_t>(value); + } else { + row[x] = static_cast<unsigned char>(value); + } + } + row += stride; + } + } +} + +TEST(EncodeForcedMaxFrameWidthHeight, DimensionDecreasing) { + constexpr int kWidth = 128; + constexpr int kHeight = 128; + constexpr size_t kBufferSize = 3 * kWidth * kHeight; + std::vector<unsigned char> buffer(kBufferSize); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1, + buffer.data())); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + FillImageGradient(&img, 8); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY)); + cfg.rc_end_usage = AOM_Q; + cfg.g_profile = 0; + cfg.g_bit_depth = AOM_BITS_8; + cfg.g_input_bit_depth = 8; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_forced_max_frame_width = kWidth; + cfg.g_forced_max_frame_height = kHeight; + cfg.g_lag_in_frames = 1; + cfg.rc_min_quantizer = 20; + cfg.rc_max_quantizer = 40; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 30)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM)); + + // First frame + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Second frame + constexpr int kWidthSmall = 64; + constexpr int kHeightSmall = 64; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidthSmall, + kHeightSmall, 1, buffer.data())); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + FillImageGradient(&img, 8); + cfg.g_w = kWidthSmall; + cfg.g_h = kHeightSmall; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +#endif // !CONFIG_REALTIME_ONLY + +TEST(EncodeForcedMaxFrameWidthHeight, RealtimeLag0TunePSNR) { + RunTest(AOM_USAGE_REALTIME, /*lag_in_frames=*/0, "psnr"); +} + +TEST(EncodeForcedMaxFrameWidthHeight, RealtimeLag0TuneSSIM) { + RunTest(AOM_USAGE_REALTIME, /*lag_in_frames=*/0, "ssim"); +} + +TEST(EncodeForcedMaxFrameWidthHeight, RealtimeLag1TunePSNR) { + RunTest(AOM_USAGE_REALTIME, /*lag_in_frames=*/1, "psnr"); +} + +TEST(EncodeForcedMaxFrameWidthHeight, RealtimeLag1TuneSSIM) { + RunTest(AOM_USAGE_REALTIME, /*lag_in_frames=*/1, "ssim"); +} + +TEST(EncodeForcedMaxFrameWidthHeight, MaxFrameSizeTooBig) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME)); + cfg.g_w = 256; + cfg.g_h = 256; + cfg.g_forced_max_frame_width = 131072; + cfg.g_forced_max_frame_height = 131072; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); +} + +TEST(EncodeForcedMaxFrameWidthHeight, FirstFrameTooBig) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME)); + cfg.g_w = 258; + cfg.g_h = 256; + cfg.g_forced_max_frame_width = 256; + cfg.g_forced_max_frame_height = 256; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); + cfg.g_w = 256; + cfg.g_h = 258; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, iface, &cfg, 0)); + cfg.g_w = 256; + cfg.g_h = 256; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(EncodeForcedMaxFrameWidthHeight, SecondFrameTooBig) { + // A buffer of gray samples. Large enough for 128x128 and 256x256, YUV 4:2:0. + constexpr size_t kImageDataSize = 256 * 256 + 2 * 128 * 128; + std::unique_ptr<unsigned char[]> img_data(new unsigned char[kImageDataSize]); + ASSERT_NE(img_data, nullptr); + memset(img_data.get(), 128, kImageDataSize); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME)); + cfg.g_w = 128; + cfg.g_h = 128; + cfg.g_forced_max_frame_width = 255; + cfg.g_forced_max_frame_height = 256; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + + aom_image_t img; + EXPECT_EQ(&img, + aom_img_wrap(&img, AOM_IMG_FMT_I420, 128, 128, 1, img_data.get())); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + + cfg.g_w = 256; + cfg.g_h = 256; + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_config_set(&enc, &cfg)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +} // namespace diff --git a/third_party/aom/test/frame_parallel_enc_test.cc b/third_party/aom/test/frame_parallel_enc_test.cc new file mode 100644 index 0000000000..86d5ddb7d4 --- /dev/null +++ b/third_party/aom/test/frame_parallel_enc_test.cc @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" + +namespace { + +#if CONFIG_FPMT_TEST && !CONFIG_REALTIME_ONLY +class AVxFrameParallelThreadEncodeTest + : public ::libaom_test::CodecTestWith3Params<int, int, int>, + public ::libaom_test::EncoderTest { + protected: + AVxFrameParallelThreadEncodeTest() + : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), + set_cpu_used_(GET_PARAM(1)), tile_cols_(GET_PARAM(2)), + tile_rows_(GET_PARAM(3)) { + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = 1280; + cfg.h = 720; + cfg.allow_lowbitdepth = 1; + decoder_ = codec_->CreateDecoder(cfg, 0); + } + ~AVxFrameParallelThreadEncodeTest() override { delete decoder_; } + + void SetUp() override { + InitializeConfig(::libaom_test::kTwoPassGood); + cfg_.rc_end_usage = AOM_VBR; + cfg_.g_lag_in_frames = 35; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + cfg_.rc_max_quantizer = 63; + cfg_.rc_min_quantizer = 0; + cfg_.g_threads = 16; + } + + void BeginPassHook(unsigned int /*pass*/) override { + encoder_initialized_ = false; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource * /*video*/, + ::libaom_test::Encoder *encoder) override { + if (encoder_initialized_) return; + SetTileSize(encoder); + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_SET_FP_MT, 1); + encoder->Control(AV1E_SET_FP_MT_UNIT_TEST, enable_actual_parallel_encode_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0); + + encoder_initialized_ = true; + } + + virtual void SetTileSize(libaom_test::Encoder *encoder) { + encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_); + encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_); + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + size_enc_.push_back(pkt->data.frame.sz); + + ::libaom_test::MD5 md5_enc; + md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz); + md5_enc_.push_back(md5_enc.Get()); + + const aom_codec_err_t res = decoder_->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + const aom_image_t *img = decoder_->GetDxData().Next(); + + if (img) { + ::libaom_test::MD5 md5_res; + md5_res.Add(img); + md5_dec_.push_back(md5_res.Get()); + } + } + + void DoTest(::libaom_test::VideoSource *input_video) { + /* This is the actual parallel encode of frames using multiple cpis. + * The parallel frames are independently encoded. + * Threads are distributed among the parallel frames whereas non-parallel + * frames use all the threads. Example: for 8 threads, in case of 4 frames + * in a parallel encode set, each frame gets 2 threads. In case of 3 frames + * in a parallel encode set, threads are distributed as 2, 3 ,3. + */ + enable_actual_parallel_encode_ = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(input_video)); + std::vector<size_t> enc_stream_fpmt_size; + std::vector<std::string> enc_stream_fpmt; + std::vector<std::string> dec_stream_fpmt; + enc_stream_fpmt_size = size_enc_; + enc_stream_fpmt = md5_enc_; + dec_stream_fpmt = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + /* This is the simulation of parallel encode of frames using single cpi. + * In simulation, it should be ensured to have no dependency across frames + * (similar to parallel encode). + * Each frame uses all the threads configured. + */ + enable_actual_parallel_encode_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(input_video)); + std::vector<size_t> enc_stream_sim_size; + std::vector<std::string> enc_stream_sim; + std::vector<std::string> dec_stream_sim; + enc_stream_sim_size = size_enc_; + enc_stream_sim = md5_enc_; + dec_stream_sim = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Check that the vectors are equal. + ASSERT_EQ(enc_stream_sim_size, enc_stream_fpmt_size); + ASSERT_EQ(enc_stream_sim, enc_stream_fpmt); + ASSERT_EQ(dec_stream_sim, dec_stream_fpmt); + } + + bool encoder_initialized_; + int set_cpu_used_; + int tile_cols_; + int tile_rows_; + int enable_actual_parallel_encode_; + ::libaom_test::Decoder *decoder_; + std::vector<size_t> size_enc_; + std::vector<std::string> md5_enc_; + std::vector<std::string> md5_dec_; +}; + +class AVxFrameParallelThreadEncodeHDResTestLarge + : public AVxFrameParallelThreadEncodeTest {}; + +TEST_P(AVxFrameParallelThreadEncodeHDResTestLarge, + FrameParallelThreadEncodeTest) { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + cfg_.rc_target_bitrate = 500; + DoTest(&video); +} + +class AVxFrameParallelThreadEncodeLowResTestLarge + : public AVxFrameParallelThreadEncodeTest {}; + +TEST_P(AVxFrameParallelThreadEncodeLowResTestLarge, + FrameParallelThreadEncodeTest) { + ::libaom_test::YUVVideoSource video("hantro_collage_w352h288.yuv", + AOM_IMG_FMT_I420, 352, 288, 30, 1, 0, 60); + cfg_.rc_target_bitrate = 200; + DoTest(&video); +} + +class AVxFrameParallelThreadEncodeLowResTest + : public AVxFrameParallelThreadEncodeTest {}; + +TEST_P(AVxFrameParallelThreadEncodeLowResTest, FrameParallelThreadEncodeTest) { + ::libaom_test::YUVVideoSource video("hantro_collage_w352h288.yuv", + AOM_IMG_FMT_I420, 352, 288, 30, 1, 0, 60); + cfg_.rc_target_bitrate = 200; + DoTest(&video); +} + +AV1_INSTANTIATE_TEST_SUITE(AVxFrameParallelThreadEncodeHDResTestLarge, + ::testing::Values(2, 3, 4, 5, 6), + ::testing::Values(0, 1, 2), ::testing::Values(0, 1)); + +AV1_INSTANTIATE_TEST_SUITE(AVxFrameParallelThreadEncodeLowResTestLarge, + ::testing::Values(2, 3), ::testing::Values(0, 1, 2), + ::testing::Values(0, 1)); + +AV1_INSTANTIATE_TEST_SUITE(AVxFrameParallelThreadEncodeLowResTest, + ::testing::Values(4, 5, 6), ::testing::Values(1), + ::testing::Values(0)); +#endif // CONFIG_FPMT_TEST && !CONFIG_REALTIME_ONLY + +} // namespace diff --git a/third_party/aom/test/frame_size_tests.cc b/third_party/aom/test/frame_size_tests.cc new file mode 100644 index 0000000000..ea8cf47ab8 --- /dev/null +++ b/third_party/aom/test/frame_size_tests.cc @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <array> +#include <memory> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/video_source.h" +#include "test/util.h" + +namespace { + +class AV1FrameSizeTests : public ::testing::Test, + public ::libaom_test::EncoderTest { + protected: + AV1FrameSizeTests() + : EncoderTest(&::libaom_test::kAV1), expected_res_(AOM_CODEC_OK) {} + ~AV1FrameSizeTests() override = default; + + void SetUp() override { InitializeConfig(::libaom_test::kRealTime); } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError(); + return !::testing::Test::HasFailure(); + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 7); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + int expected_res_; +}; + +#if CONFIG_SIZE_LIMIT +// TODO(Casey.Smalley@arm.com) fails due to newer bounds checks that get caught +// before the assert below added in ebc2714d71a834fc32a19eef0a81f51fbc47db01 +TEST_F(AV1FrameSizeTests, DISABLED_TestInvalidSizes) { + ::libaom_test::RandomVideoSource video; + + video.SetSize(DECODE_WIDTH_LIMIT + 16, DECODE_HEIGHT_LIMIT + 16); + video.set_limit(2); + expected_res_ = AOM_CODEC_CORRUPT_FRAME; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +// TODO(Casey.Smalley@arm.com) similar to the above test, needs to be +// updated for the new rejection case +TEST_F(AV1FrameSizeTests, DISABLED_LargeValidSizes) { + ::libaom_test::RandomVideoSource video; + + video.SetSize(DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT); + video.set_limit(2); + expected_res_ = AOM_CODEC_OK; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +#endif + +TEST_F(AV1FrameSizeTests, OneByOneVideo) { + ::libaom_test::RandomVideoSource video; + + video.SetSize(1, 1); + video.set_limit(2); + expected_res_ = AOM_CODEC_OK; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +// Parameters: AOM_USAGE_*, aom_rc_mode, cpu-used. +class AV1ResolutionChange + : public testing::TestWithParam<std::tuple<int, aom_rc_mode, int>> { + public: + AV1ResolutionChange() + : usage_(std::get<0>(GetParam())), rc_mode_(std::get<1>(GetParam())), + cpu_used_(std::get<2>(GetParam())) {} + AV1ResolutionChange(const AV1ResolutionChange &) = delete; + AV1ResolutionChange &operator=(const AV1ResolutionChange &) = delete; + ~AV1ResolutionChange() override = default; + + protected: + int usage_; + aom_rc_mode rc_mode_; + int cpu_used_; +}; + +TEST_P(AV1ResolutionChange, InvalidRefSize) { + struct FrameSize { + unsigned int width; + unsigned int height; + }; + static constexpr std::array<FrameSize, 3> kFrameSizes = { { + { 1768, 200 }, + { 50, 200 }, + { 850, 200 }, + } }; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage_), AOM_CODEC_OK); + + // Resolution changes are only permitted with one pass encoding with no lag. + cfg.g_pass = AOM_RC_ONE_PASS; + cfg.g_lag_in_frames = 0; + cfg.rc_end_usage = rc_mode_; + + aom_codec_ctx_t ctx; + EXPECT_EQ(aom_codec_enc_init(&ctx, iface, &cfg, 0), AOM_CODEC_OK); + std::unique_ptr<aom_codec_ctx_t, decltype(&aom_codec_destroy)> enc( + &ctx, &aom_codec_destroy); + EXPECT_EQ(aom_codec_control(enc.get(), AOME_SET_CPUUSED, cpu_used_), + AOM_CODEC_OK); + + size_t frame_count = 0; + ::libaom_test::RandomVideoSource video; + video.Begin(); + constexpr int kNumFramesPerResolution = 2; + for (const auto &frame_size : kFrameSizes) { + cfg.g_w = frame_size.width; + cfg.g_h = frame_size.height; + EXPECT_EQ(aom_codec_enc_config_set(enc.get(), &cfg), AOM_CODEC_OK); + video.SetSize(cfg.g_w, cfg.g_h); + + aom_codec_iter_t iter; + const aom_codec_cx_pkt_t *pkt; + + for (int i = 0; i < kNumFramesPerResolution; ++i) { + video.Next(); // SetSize() does not call FillFrame(). + EXPECT_EQ(aom_codec_encode(enc.get(), video.img(), video.pts(), + video.duration(), /*flags=*/0), + AOM_CODEC_OK); + + iter = nullptr; + while ((pkt = aom_codec_get_cx_data(enc.get(), &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // The frame following a resolution change should be a keyframe as the + // change is too extreme to allow previous references to be used. + if (i == 0 || usage_ == AOM_USAGE_ALL_INTRA) { + EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u) + << "frame " << frame_count; + } + frame_count++; + } + } + } + + EXPECT_EQ(frame_count, kNumFramesPerResolution * kFrameSizes.size()); +} + +TEST_P(AV1ResolutionChange, RandomInput) { + struct FrameSize { + unsigned int width; + unsigned int height; + }; + static constexpr std::array<FrameSize, 4> kFrameSizes = { { + { 50, 200 }, + { 100, 200 }, + { 100, 300 }, + { 200, 400 }, + } }; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage_), AOM_CODEC_OK); + + // Resolution changes are only permitted with one pass encoding with no lag. + cfg.g_pass = AOM_RC_ONE_PASS; + cfg.g_lag_in_frames = 0; + cfg.rc_end_usage = rc_mode_; + // For random input source, if max frame sizes are not set, the first encoded + // frame size will be locked as the max frame size, and the encoder will + // identify it as unsupported bitstream. + unsigned int max_width = cfg.g_w; // default frame width + unsigned int max_height = cfg.g_h; // default frame height + for (const auto &frame_size : kFrameSizes) { + max_width = frame_size.width > max_width ? frame_size.width : max_width; + max_height = + frame_size.height > max_height ? frame_size.height : max_height; + } + cfg.g_forced_max_frame_width = max_width; + cfg.g_forced_max_frame_height = max_height; + + aom_codec_ctx_t ctx; + EXPECT_EQ(aom_codec_enc_init(&ctx, iface, &cfg, 0), AOM_CODEC_OK); + std::unique_ptr<aom_codec_ctx_t, decltype(&aom_codec_destroy)> enc( + &ctx, &aom_codec_destroy); + EXPECT_EQ(aom_codec_control(enc.get(), AOME_SET_CPUUSED, cpu_used_), + AOM_CODEC_OK); + + size_t frame_count = 0; + ::libaom_test::RandomVideoSource video; + video.Begin(); + constexpr int kNumFramesPerResolution = 2; + for (const auto &frame_size : kFrameSizes) { + cfg.g_w = frame_size.width; + cfg.g_h = frame_size.height; + EXPECT_EQ(aom_codec_enc_config_set(enc.get(), &cfg), AOM_CODEC_OK); + video.SetSize(cfg.g_w, cfg.g_h); + + aom_codec_iter_t iter; + const aom_codec_cx_pkt_t *pkt; + + for (int i = 0; i < kNumFramesPerResolution; ++i) { + video.Next(); // SetSize() does not call FillFrame(). + EXPECT_EQ(aom_codec_encode(enc.get(), video.img(), video.pts(), + video.duration(), /*flags=*/0), + AOM_CODEC_OK); + + iter = nullptr; + while ((pkt = aom_codec_get_cx_data(enc.get(), &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // The frame following a resolution change should be a keyframe as the + // change is too extreme to allow previous references to be used. + if (i == 0 || usage_ == AOM_USAGE_ALL_INTRA) { + EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u) + << "frame " << frame_count; + } + frame_count++; + } + } + } + + EXPECT_EQ(frame_count, kNumFramesPerResolution * kFrameSizes.size()); +} + +TEST_P(AV1ResolutionChange, InvalidInputSize) { + struct FrameSize { + unsigned int width; + unsigned int height; + }; + static constexpr std::array<FrameSize, 3> kFrameSizes = { { + { 1768, 0 }, + { 0, 200 }, + { 850, 200 }, + } }; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage_), AOM_CODEC_OK); + + // Resolution changes are only permitted with one pass encoding with no lag. + cfg.g_pass = AOM_RC_ONE_PASS; + cfg.g_lag_in_frames = 0; + cfg.rc_end_usage = rc_mode_; + + aom_codec_ctx_t ctx; + EXPECT_EQ(aom_codec_enc_init(&ctx, iface, &cfg, 0), AOM_CODEC_OK); + std::unique_ptr<aom_codec_ctx_t, decltype(&aom_codec_destroy)> enc( + &ctx, &aom_codec_destroy); + EXPECT_EQ(aom_codec_control(enc.get(), AOME_SET_CPUUSED, cpu_used_), + AOM_CODEC_OK); + + int frame_count = 0; + ::libaom_test::RandomVideoSource video; + video.Begin(); + constexpr int kNumFramesPerResolution = 2; + for (const auto &frame_size : kFrameSizes) { + cfg.g_w = frame_size.width; + cfg.g_h = frame_size.height; + if (cfg.g_w < 1 || cfg.g_w > 65536 || cfg.g_h < 1 || cfg.g_h > 65536) { + EXPECT_EQ(aom_codec_enc_config_set(enc.get(), &cfg), + AOM_CODEC_INVALID_PARAM); + continue; + } + + EXPECT_EQ(aom_codec_enc_config_set(enc.get(), &cfg), AOM_CODEC_OK); + video.SetSize(cfg.g_w, cfg.g_h); + + aom_codec_iter_t iter; + const aom_codec_cx_pkt_t *pkt; + + for (int i = 0; i < kNumFramesPerResolution; ++i) { + video.Next(); // SetSize() does not call FillFrame(). + EXPECT_EQ(aom_codec_encode(enc.get(), video.img(), video.pts(), + video.duration(), /*flags=*/0), + AOM_CODEC_OK); + + iter = nullptr; + while ((pkt = aom_codec_get_cx_data(enc.get(), &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // The frame following a resolution change should be a keyframe as the + // change is too extreme to allow previous references to be used. + if (i == 0 || usage_ == AOM_USAGE_ALL_INTRA) { + EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u) + << "frame " << frame_count; + } + frame_count++; + } + } + } + + EXPECT_EQ(frame_count, 2); +} + +INSTANTIATE_TEST_SUITE_P( + Realtime, AV1ResolutionChange, + ::testing::Combine(::testing::Values(AOM_USAGE_REALTIME), + ::testing::Values(AOM_VBR, AOM_CBR), + ::testing::Range(6, 11))); + +#if !CONFIG_REALTIME_ONLY +INSTANTIATE_TEST_SUITE_P( + GoodQuality, AV1ResolutionChange, + ::testing::Combine(::testing::Values(AOM_USAGE_GOOD_QUALITY), + ::testing::Values(AOM_VBR, AOM_CBR, AOM_CQ, AOM_Q), + ::testing::Range(2, 6))); +INSTANTIATE_TEST_SUITE_P( + GoodQualityLarge, AV1ResolutionChange, + ::testing::Combine(::testing::Values(AOM_USAGE_GOOD_QUALITY), + ::testing::Values(AOM_VBR, AOM_CBR, AOM_CQ, AOM_Q), + ::testing::Range(0, 2))); +INSTANTIATE_TEST_SUITE_P( + AllIntra, AV1ResolutionChange, + ::testing::Combine(::testing::Values(AOM_USAGE_ALL_INTRA), + ::testing::Values(AOM_Q), ::testing::Range(6, 10))); + +typedef struct { + unsigned int width; + unsigned int height; +} FrameSizeParam; + +const FrameSizeParam FrameSizeTestParams[] = { { 96, 96 }, { 176, 144 } }; + +// This unit test is used to validate the allocated size of compressed data +// (ctx->cx_data) buffer, by feeding pseudo random input to the encoder in +// lossless encoding mode. +// +// If compressed data buffer is not large enough, the av1_get_compressed_data() +// call in av1/av1_cx_iface.c will overflow the buffer. +class AV1LosslessFrameSizeTests + : public ::libaom_test::CodecTestWith2Params<FrameSizeParam, + ::libaom_test::TestMode>, + public ::libaom_test::EncoderTest { + protected: + AV1LosslessFrameSizeTests() + : EncoderTest(GET_PARAM(0)), frame_size_param_(GET_PARAM(1)), + encoding_mode_(GET_PARAM(2)) {} + ~AV1LosslessFrameSizeTests() override = default; + + void SetUp() override { InitializeConfig(encoding_mode_); } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError(); + return !::testing::Test::HasFailure(); + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 6); + encoder->Control(AV1E_SET_LOSSLESS, 1); + } + } + + const FrameSizeParam frame_size_param_; + const ::libaom_test::TestMode encoding_mode_; + int expected_res_; +}; + +TEST_P(AV1LosslessFrameSizeTests, LosslessEncode) { + ::libaom_test::RandomVideoSource video; + + video.SetSize(frame_size_param_.width, frame_size_param_.height); + video.set_limit(10); + expected_res_ = AOM_CODEC_OK; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_SUITE(AV1LosslessFrameSizeTests, + ::testing::ValuesIn(FrameSizeTestParams), + testing::Values(::libaom_test::kAllIntra)); +#endif // !CONFIG_REALTIME_ONLY + +} // namespace diff --git a/third_party/aom/test/function_equivalence_test.h b/third_party/aom/test/function_equivalence_test.h new file mode 100644 index 0000000000..2268b9f2ad --- /dev/null +++ b/third_party/aom/test/function_equivalence_test.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_FUNCTION_EQUIVALENCE_TEST_H_ +#define AOM_TEST_FUNCTION_EQUIVALENCE_TEST_H_ + +#include <ostream> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/util.h" + +using libaom_test::ACMRandom; + +namespace libaom_test { +// Base class for tests that compare 2 implementations of the same function +// for equivalence. The template parameter should be pointer to a function +// that is being tested. +// +// The test takes a 3-parameters encapsulating struct 'FuncParam', containing: +// - Pointer to reference function +// - Pointer to tested function +// - Integer bit depth (default to 0). +// +// These values are then accessible in the tests as member of params_: +// params_.ref_func, params_.tst_func, and params_.bit_depth. +// + +template <typename T> +struct FuncParam { + FuncParam(T ref = nullptr, T tst = nullptr, int depth = 0) + : ref_func(ref), tst_func(tst), bit_depth(depth) {} + T ref_func; + T tst_func; + int bit_depth; +}; + +template <typename T> +std::ostream &operator<<(std::ostream &os, const FuncParam<T> &p) { + return os << "bit_depth:" << p.bit_depth + << " function:" << reinterpret_cast<const void *>(p.ref_func) + << " function:" << reinterpret_cast<const void *>(p.tst_func); +} + +template <typename T> +class FunctionEquivalenceTest : public ::testing::TestWithParam<FuncParam<T> > { + public: + FunctionEquivalenceTest() : rng_(ACMRandom::DeterministicSeed()) {} + + ~FunctionEquivalenceTest() override = default; + + void SetUp() override { params_ = this->GetParam(); } + + protected: + ACMRandom rng_; + FuncParam<T> params_; +}; + +} // namespace libaom_test +#endif // AOM_TEST_FUNCTION_EQUIVALENCE_TEST_H_ diff --git a/third_party/aom/test/fwht4x4_test.cc b/third_party/aom/test/fwht4x4_test.cc new file mode 100644 index 0000000000..bb9e218f6f --- /dev/null +++ b/third_party/aom/test/fwht4x4_test.cc @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include <tuple> + +#include "aom_dsp/aom_dsp_common.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" +#include "config/aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/transform_test_base.h" +#include "test/util.h" +#include "av1/common/entropy.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_ports/mem.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); +typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride); + +using libaom_test::FhtFunc; + +typedef std::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t, int, FdctFunc> + Dct4x4Param; + +void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride, + TxfmParam * /*txfm_param*/) { + av1_fwht4x4_c(in, out, stride); +} + +void iwht4x4_10_c(const tran_low_t *in, uint8_t *out, int stride) { + av1_highbd_iwht4x4_16_add_c(in, out, stride, 10); +} + +void iwht4x4_12_c(const tran_low_t *in, uint8_t *out, int stride) { + av1_highbd_iwht4x4_16_add_c(in, out, stride, 12); +} + +#if HAVE_SSE4_1 + +void iwht4x4_10_sse4_1(const tran_low_t *in, uint8_t *out, int stride) { + av1_highbd_iwht4x4_16_add_sse4_1(in, out, stride, 10); +} + +void iwht4x4_12_sse4_1(const tran_low_t *in, uint8_t *out, int stride) { + av1_highbd_iwht4x4_16_add_sse4_1(in, out, stride, 12); +} + +#endif + +class Trans4x4WHT : public libaom_test::TransformTestBase<tran_low_t>, + public ::testing::TestWithParam<Dct4x4Param> { + public: + ~Trans4x4WHT() override = default; + + void SetUp() override { + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + pitch_ = 4; + height_ = 4; + fwd_txfm_ref = fwht4x4_ref; + bit_depth_ = GET_PARAM(3); + mask_ = (1 << bit_depth_) - 1; + num_coeffs_ = GET_PARAM(4); + fwd_txfm_c_ = GET_PARAM(5); + } + + protected: + void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) override { + fwd_txfm_(in, out, stride); + } + void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) override { + inv_txfm_(out, dst, stride); + } + void RunSpeedTest() { + if (!fwd_txfm_c_) { + GTEST_SKIP(); + } else { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 10; + const int numIter = 5000; + + int c_sum_time = 0; + int simd_sum_time = 0; + + int stride = 96; + + int16_t *input_block = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * stride * height_)); + ASSERT_NE(input_block, nullptr); + tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(output_ref_block[0]) * num_coeffs_)); + ASSERT_NE(output_ref_block, nullptr); + tran_low_t *output_block = reinterpret_cast<tran_low_t *>( + aom_memalign(16, sizeof(output_block[0]) * num_coeffs_)); + ASSERT_NE(output_block, nullptr); + + for (int i = 0; i < count_test_block; ++i) { + for (int j = 0; j < height_; ++j) { + for (int k = 0; k < pitch_; ++k) { + int in_idx = j * stride + k; + int out_idx = j * pitch_ + k; + input_block[in_idx] = + (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + if (bit_depth_ == AOM_BITS_8) { + output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8(); + } else { + output_block[out_idx] = output_ref_block[out_idx] = + rnd.Rand16() & mask_; + } + } + } + + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + for (int iter = 0; iter < numIter; iter++) { + API_REGISTER_STATE_CHECK( + fwd_txfm_c_(input_block, output_ref_block, stride)); + } + aom_usec_timer_mark(&c_timer_); + + aom_usec_timer simd_timer_; + aom_usec_timer_start(&simd_timer_); + + for (int iter = 0; iter < numIter; iter++) { + API_REGISTER_STATE_CHECK( + fwd_txfm_(input_block, output_block, stride)); + } + aom_usec_timer_mark(&simd_timer_); + + c_sum_time += static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + simd_sum_time += static_cast<int>(aom_usec_timer_elapsed(&simd_timer_)); + + // The minimum quant value is 4. + for (int j = 0; j < height_; ++j) { + for (int k = 0; k < pitch_; ++k) { + int out_idx = j * pitch_ + k; + ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx]) + << "Error: not bit-exact result at index: " << out_idx + << " at test block: " << i; + } + } + } + + printf( + "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time, + simd_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time))); + + aom_free(input_block); + aom_free(output_ref_block); + aom_free(output_block); + } + } + + FdctFunc fwd_txfm_; + IdctFunc inv_txfm_; + + FdctFunc fwd_txfm_c_; // C version of forward transform for speed test. +}; + +TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); } + +TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); } + +TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); } + +TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); } + +TEST_P(Trans4x4WHT, DISABLED_Speed) { RunSpeedTest(); } + +using std::make_tuple; + +INSTANTIATE_TEST_SUITE_P( + C, Trans4x4WHT, + ::testing::Values(make_tuple(&av1_fwht4x4_c, &iwht4x4_10_c, DCT_DCT, + AOM_BITS_10, 16, + static_cast<FdctFunc>(nullptr)), + make_tuple(&av1_fwht4x4_c, &iwht4x4_12_c, DCT_DCT, + AOM_BITS_12, 16, + static_cast<FdctFunc>(nullptr)))); + +#if HAVE_SSE4_1 + +INSTANTIATE_TEST_SUITE_P( + SSE4_1, Trans4x4WHT, + ::testing::Values(make_tuple(&av1_fwht4x4_sse4_1, &iwht4x4_10_sse4_1, + DCT_DCT, AOM_BITS_10, 16, + static_cast<FdctFunc>(nullptr)), + make_tuple(&av1_fwht4x4_sse4_1, &iwht4x4_12_sse4_1, + DCT_DCT, AOM_BITS_12, 16, + static_cast<FdctFunc>(nullptr)))); + +#endif // HAVE_SSE4_1 + +#if HAVE_NEON + +INSTANTIATE_TEST_SUITE_P( + NEON, Trans4x4WHT, + ::testing::Values(make_tuple(&av1_fwht4x4_neon, &iwht4x4_10_c, DCT_DCT, + AOM_BITS_10, 16, &av1_fwht4x4_c), + make_tuple(&av1_fwht4x4_neon, &iwht4x4_12_c, DCT_DCT, + AOM_BITS_12, 16, &av1_fwht4x4_c))); + +#endif // HAVE_NEON + +} // namespace diff --git a/third_party/aom/test/gf_pyr_height_test.cc b/third_party/aom/test/gf_pyr_height_test.cc new file mode 100644 index 0000000000..0996d80c25 --- /dev/null +++ b/third_party/aom/test/gf_pyr_height_test.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <ostream> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +static const struct GFPyrHeightTestParam { + int gf_min_pyr_height; + int gf_max_pyr_height; + double psnr_thresh; +} kTestParams[] = { + // gf_min_pyr_height = 0 + { 0, 0, 32.30 }, + { 0, 1, 33.90 }, + { 0, 2, 34.00 }, + { 0, 3, 34.20 }, + { 0, 4, 34.30 }, + { 0, 5, 34.35 }, + // gf_min_pyr_height = 1 + { 1, 1, 33.90 }, + { 1, 2, 34.00 }, + { 1, 3, 34.20 }, + { 1, 4, 34.30 }, + { 1, 5, 34.35 }, + // gf_min_pyr_height = 2 + { 2, 2, 34.00 }, + { 2, 3, 34.20 }, + { 2, 4, 34.30 }, + { 2, 5, 34.35 }, + // gf_min_pyr_height = 3 + { 3, 3, 34.20 }, + { 3, 4, 34.30 }, + { 3, 5, 34.35 }, + // gf_min_pyr_height = 4 + { 4, 4, 34.30 }, + { 4, 5, 34.35 }, + // gf_min_pyr_height = 5 + { 5, 5, 34.35 }, +}; + +// Compiler may decide to add some padding to the struct above for alignment, +// which the gtest may try to print (on error for example). This would cause +// valgrind to complain that the padding is uninitialized. To avoid that, we +// provide our own function to print the struct. +// This also makes '--gtest_list_tests' output more understandable. +std::ostream &operator<<(std::ostream &os, const GFPyrHeightTestParam &p) { + os << "GFPyrHeightTestParam { " + << "gf_min_pyr_height = " << p.gf_min_pyr_height << ", " + << "gf_max_pyr_height = " << p.gf_max_pyr_height << ", " + << "psnr_thresh = " << p.psnr_thresh << " }"; + return os; +} + +// Params: encoding mode, rate control mode and GFPyrHeightTestParam object. +class GFPyrHeightTest + : public ::libaom_test::CodecTestWith3Params< + libaom_test::TestMode, aom_rc_mode, GFPyrHeightTestParam>, + public ::libaom_test::EncoderTest { + protected: + GFPyrHeightTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + rc_mode_(GET_PARAM(2)) { + gf_min_pyr_height_ = GET_PARAM(3).gf_min_pyr_height; + gf_max_pyr_height_ = GET_PARAM(3).gf_max_pyr_height; + psnr_threshold_ = GET_PARAM(3).psnr_thresh; + } + ~GFPyrHeightTest() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cpu_used_ = 4; + cfg_.rc_end_usage = rc_mode_; + if (rc_mode_ == AOM_VBR) { + cfg_.rc_target_bitrate = 200; + } + cfg_.g_lag_in_frames = 19; + cfg_.g_threads = 0; + init_flags_ = AOM_CODEC_USE_PSNR; + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + if (rc_mode_ == AOM_Q) { + encoder->Control(AOME_SET_CQ_LEVEL, 32); + } + if (encoding_mode_ != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + encoder->Control(AV1E_SET_GF_MIN_PYRAMID_HEIGHT, gf_min_pyr_height_); + encoder->Control(AV1E_SET_GF_MAX_PYRAMID_HEIGHT, gf_max_pyr_height_); + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetPsnrThreshold() { return psnr_threshold_; } + + ::libaom_test::TestMode encoding_mode_; + aom_rc_mode rc_mode_; + double psnr_threshold_; + int gf_min_pyr_height_; + int gf_max_pyr_height_; + int cpu_used_; + int nframes_; + double psnr_; +}; + +TEST_P(GFPyrHeightTest, EncodeAndVerifyPSNR) { + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 32); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + EXPECT_GT(GetAveragePsnr(), GetPsnrThreshold()) + << "GF Min Pyramid Height = " << gf_min_pyr_height_ << ", " + << "GF Max Pyramid Height = " << gf_max_pyr_height_; +} + +AV1_INSTANTIATE_TEST_SUITE(GFPyrHeightTest, NONREALTIME_TEST_MODES, + ::testing::Values(AOM_Q, AOM_VBR), + ::testing::ValuesIn(kTestParams)); +} // namespace diff --git a/third_party/aom/test/gviz_api.py b/third_party/aom/test/gviz_api.py new file mode 100755 index 0000000000..d3a443dabf --- /dev/null +++ b/third_party/aom/test/gviz_api.py @@ -0,0 +1,1087 @@ +#!/usr/bin/python +# +# Copyright (c) 2016, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# + +"""Converts Python data into data for Google Visualization API clients. + +This library can be used to create a google.visualization.DataTable usable by +visualizations built on the Google Visualization API. Output formats are raw +JSON, JSON response, JavaScript, CSV, and HTML table. + +See http://code.google.com/apis/visualization/ for documentation on the +Google Visualization API. +""" + +__author__ = "Amit Weinstein, Misha Seltzer, Jacob Baskin" + +import cgi +import cStringIO +import csv +import datetime +try: + import json +except ImportError: + import simplejson as json +import types + + +class DataTableException(Exception): + """The general exception object thrown by DataTable.""" + pass + + +class DataTableJSONEncoder(json.JSONEncoder): + """JSON encoder that handles date/time/datetime objects correctly.""" + + def __init__(self): + json.JSONEncoder.__init__(self, + separators=(",", ":"), + ensure_ascii=False) + + def default(self, o): + if isinstance(o, datetime.datetime): + if o.microsecond == 0: + # If the time doesn't have ms-resolution, leave it out to keep + # things smaller. + return "Date(%d,%d,%d,%d,%d,%d)" % ( + o.year, o.month - 1, o.day, o.hour, o.minute, o.second) + else: + return "Date(%d,%d,%d,%d,%d,%d,%d)" % ( + o.year, o.month - 1, o.day, o.hour, o.minute, o.second, + o.microsecond / 1000) + elif isinstance(o, datetime.date): + return "Date(%d,%d,%d)" % (o.year, o.month - 1, o.day) + elif isinstance(o, datetime.time): + return [o.hour, o.minute, o.second] + else: + return super(DataTableJSONEncoder, self).default(o) + + +class DataTable(object): + """Wraps the data to convert to a Google Visualization API DataTable. + + Create this object, populate it with data, then call one of the ToJS... + methods to return a string representation of the data in the format described. + + You can clear all data from the object to reuse it, but you cannot clear + individual cells, rows, or columns. You also cannot modify the table schema + specified in the class constructor. + + You can add new data one or more rows at a time. All data added to an + instantiated DataTable must conform to the schema passed in to __init__(). + + You can reorder the columns in the output table, and also specify row sorting + order by column. The default column order is according to the original + table_description parameter. Default row sort order is ascending, by column + 1 values. For a dictionary, we sort the keys for order. + + The data and the table_description are closely tied, as described here: + + The table schema is defined in the class constructor's table_description + parameter. The user defines each column using a tuple of + (id[, type[, label[, custom_properties]]]). The default value for type is + string, label is the same as ID if not specified, and custom properties is + an empty dictionary if not specified. + + table_description is a dictionary or list, containing one or more column + descriptor tuples, nested dictionaries, and lists. Each dictionary key, list + element, or dictionary element must eventually be defined as + a column description tuple. Here's an example of a dictionary where the key + is a tuple, and the value is a list of two tuples: + {('a', 'number'): [('b', 'number'), ('c', 'string')]} + + This flexibility in data entry enables you to build and manipulate your data + in a Python structure that makes sense for your program. + + Add data to the table using the same nested design as the table's + table_description, replacing column descriptor tuples with cell data, and + each row is an element in the top level collection. This will be a bit + clearer after you look at the following examples showing the + table_description, matching data, and the resulting table: + + Columns as list of tuples [col1, col2, col3] + table_description: [('a', 'number'), ('b', 'string')] + AppendData( [[1, 'z'], [2, 'w'], [4, 'o'], [5, 'k']] ) + Table: + a b <--- these are column ids/labels + 1 z + 2 w + 4 o + 5 k + + Dictionary of columns, where key is a column, and value is a list of + columns {col1: [col2, col3]} + table_description: {('a', 'number'): [('b', 'number'), ('c', 'string')]} + AppendData( data: {1: [2, 'z'], 3: [4, 'w']} + Table: + a b c + 1 2 z + 3 4 w + + Dictionary where key is a column, and the value is itself a dictionary of + columns {col1: {col2, col3}} + table_description: {('a', 'number'): {'b': 'number', 'c': 'string'}} + AppendData( data: {1: {'b': 2, 'c': 'z'}, 3: {'b': 4, 'c': 'w'}} + Table: + a b c + 1 2 z + 3 4 w + """ + + def __init__(self, table_description, data=None, custom_properties=None): + """Initialize the data table from a table schema and (optionally) data. + + See the class documentation for more information on table schema and data + values. + + Args: + table_description: A table schema, following one of the formats described + in TableDescriptionParser(). Schemas describe the + column names, data types, and labels. See + TableDescriptionParser() for acceptable formats. + data: Optional. If given, fills the table with the given data. The data + structure must be consistent with schema in table_description. See + the class documentation for more information on acceptable data. You + can add data later by calling AppendData(). + custom_properties: Optional. A dictionary from string to string that + goes into the table's custom properties. This can be + later changed by changing self.custom_properties. + + Raises: + DataTableException: Raised if the data and the description did not match, + or did not use the supported formats. + """ + self.__columns = self.TableDescriptionParser(table_description) + self.__data = [] + self.custom_properties = {} + if custom_properties is not None: + self.custom_properties = custom_properties + if data: + self.LoadData(data) + + @staticmethod + def CoerceValue(value, value_type): + """Coerces a single value into the type expected for its column. + + Internal helper method. + + Args: + value: The value which should be converted + value_type: One of "string", "number", "boolean", "date", "datetime" or + "timeofday". + + Returns: + An item of the Python type appropriate to the given value_type. Strings + are also converted to Unicode using UTF-8 encoding if necessary. + If a tuple is given, it should be in one of the following forms: + - (value, formatted value) + - (value, formatted value, custom properties) + where the formatted value is a string, and custom properties is a + dictionary of the custom properties for this cell. + To specify custom properties without specifying formatted value, one can + pass None as the formatted value. + One can also have a null-valued cell with formatted value and/or custom + properties by specifying None for the value. + This method ignores the custom properties except for checking that it is a + dictionary. The custom properties are handled in the ToJSon and ToJSCode + methods. + The real type of the given value is not strictly checked. For example, + any type can be used for string - as we simply take its str( ) and for + boolean value we just check "if value". + Examples: + CoerceValue(None, "string") returns None + CoerceValue((5, "5$"), "number") returns (5, "5$") + CoerceValue(100, "string") returns "100" + CoerceValue(0, "boolean") returns False + + Raises: + DataTableException: The value and type did not match in a not-recoverable + way, for example given value 'abc' for type 'number'. + """ + if isinstance(value, tuple): + # In case of a tuple, we run the same function on the value itself and + # add the formatted value. + if (len(value) not in [2, 3] or + (len(value) == 3 and not isinstance(value[2], dict))): + raise DataTableException("Wrong format for value and formatting - %s." % + str(value)) + if not isinstance(value[1], types.StringTypes + (types.NoneType,)): + raise DataTableException("Formatted value is not string, given %s." % + type(value[1])) + js_value = DataTable.CoerceValue(value[0], value_type) + return (js_value,) + value[1:] + + t_value = type(value) + if value is None: + return value + if value_type == "boolean": + return bool(value) + + elif value_type == "number": + if isinstance(value, (int, long, float)): + return value + raise DataTableException("Wrong type %s when expected number" % t_value) + + elif value_type == "string": + if isinstance(value, unicode): + return value + else: + return str(value).decode("utf-8") + + elif value_type == "date": + if isinstance(value, datetime.datetime): + return datetime.date(value.year, value.month, value.day) + elif isinstance(value, datetime.date): + return value + else: + raise DataTableException("Wrong type %s when expected date" % t_value) + + elif value_type == "timeofday": + if isinstance(value, datetime.datetime): + return datetime.time(value.hour, value.minute, value.second) + elif isinstance(value, datetime.time): + return value + else: + raise DataTableException("Wrong type %s when expected time" % t_value) + + elif value_type == "datetime": + if isinstance(value, datetime.datetime): + return value + else: + raise DataTableException("Wrong type %s when expected datetime" % + t_value) + # If we got here, it means the given value_type was not one of the + # supported types. + raise DataTableException("Unsupported type %s" % value_type) + + @staticmethod + def EscapeForJSCode(encoder, value): + if value is None: + return "null" + elif isinstance(value, datetime.datetime): + if value.microsecond == 0: + # If it's not ms-resolution, leave that out to save space. + return "new Date(%d,%d,%d,%d,%d,%d)" % (value.year, + value.month - 1, # To match JS + value.day, + value.hour, + value.minute, + value.second) + else: + return "new Date(%d,%d,%d,%d,%d,%d,%d)" % (value.year, + value.month - 1, # match JS + value.day, + value.hour, + value.minute, + value.second, + value.microsecond / 1000) + elif isinstance(value, datetime.date): + return "new Date(%d,%d,%d)" % (value.year, value.month - 1, value.day) + else: + return encoder.encode(value) + + @staticmethod + def ToString(value): + if value is None: + return "(empty)" + elif isinstance(value, (datetime.datetime, + datetime.date, + datetime.time)): + return str(value) + elif isinstance(value, unicode): + return value + elif isinstance(value, bool): + return str(value).lower() + else: + return str(value).decode("utf-8") + + @staticmethod + def ColumnTypeParser(description): + """Parses a single column description. Internal helper method. + + Args: + description: a column description in the possible formats: + 'id' + ('id',) + ('id', 'type') + ('id', 'type', 'label') + ('id', 'type', 'label', {'custom_prop1': 'custom_val1'}) + Returns: + Dictionary with the following keys: id, label, type, and + custom_properties where: + - If label not given, it equals the id. + - If type not given, string is used by default. + - If custom properties are not given, an empty dictionary is used by + default. + + Raises: + DataTableException: The column description did not match the RE, or + unsupported type was passed. + """ + if not description: + raise DataTableException("Description error: empty description given") + + if not isinstance(description, (types.StringTypes, tuple)): + raise DataTableException("Description error: expected either string or " + "tuple, got %s." % type(description)) + + if isinstance(description, types.StringTypes): + description = (description,) + + # According to the tuple's length, we fill the keys + # We verify everything is of type string + for elem in description[:3]: + if not isinstance(elem, types.StringTypes): + raise DataTableException("Description error: expected tuple of " + "strings, current element of type %s." % + type(elem)) + desc_dict = {"id": description[0], + "label": description[0], + "type": "string", + "custom_properties": {}} + if len(description) > 1: + desc_dict["type"] = description[1].lower() + if len(description) > 2: + desc_dict["label"] = description[2] + if len(description) > 3: + if not isinstance(description[3], dict): + raise DataTableException("Description error: expected custom " + "properties of type dict, current element " + "of type %s." % type(description[3])) + desc_dict["custom_properties"] = description[3] + if len(description) > 4: + raise DataTableException("Description error: tuple of length > 4") + if desc_dict["type"] not in ["string", "number", "boolean", + "date", "datetime", "timeofday"]: + raise DataTableException( + "Description error: unsupported type '%s'" % desc_dict["type"]) + return desc_dict + + @staticmethod + def TableDescriptionParser(table_description, depth=0): + """Parses the table_description object for internal use. + + Parses the user-submitted table description into an internal format used + by the Python DataTable class. Returns the flat list of parsed columns. + + Args: + table_description: A description of the table which should comply + with one of the formats described below. + depth: Optional. The depth of the first level in the current description. + Used by recursive calls to this function. + + Returns: + List of columns, where each column represented by a dictionary with the + keys: id, label, type, depth, container which means the following: + - id: the id of the column + - name: The name of the column + - type: The datatype of the elements in this column. Allowed types are + described in ColumnTypeParser(). + - depth: The depth of this column in the table description + - container: 'dict', 'iter' or 'scalar' for parsing the format easily. + - custom_properties: The custom properties for this column. + The returned description is flattened regardless of how it was given. + + Raises: + DataTableException: Error in a column description or in the description + structure. + + Examples: + A column description can be of the following forms: + 'id' + ('id',) + ('id', 'type') + ('id', 'type', 'label') + ('id', 'type', 'label', {'custom_prop1': 'custom_val1'}) + or as a dictionary: + 'id': 'type' + 'id': ('type',) + 'id': ('type', 'label') + 'id': ('type', 'label', {'custom_prop1': 'custom_val1'}) + If the type is not specified, we treat it as string. + If no specific label is given, the label is simply the id. + If no custom properties are given, we use an empty dictionary. + + input: [('a', 'date'), ('b', 'timeofday', 'b', {'foo': 'bar'})] + output: [{'id': 'a', 'label': 'a', 'type': 'date', + 'depth': 0, 'container': 'iter', 'custom_properties': {}}, + {'id': 'b', 'label': 'b', 'type': 'timeofday', + 'depth': 0, 'container': 'iter', + 'custom_properties': {'foo': 'bar'}}] + + input: {'a': [('b', 'number'), ('c', 'string', 'column c')]} + output: [{'id': 'a', 'label': 'a', 'type': 'string', + 'depth': 0, 'container': 'dict', 'custom_properties': {}}, + {'id': 'b', 'label': 'b', 'type': 'number', + 'depth': 1, 'container': 'iter', 'custom_properties': {}}, + {'id': 'c', 'label': 'column c', 'type': 'string', + 'depth': 1, 'container': 'iter', 'custom_properties': {}}] + + input: {('a', 'number', 'column a'): { 'b': 'number', 'c': 'string'}} + output: [{'id': 'a', 'label': 'column a', 'type': 'number', + 'depth': 0, 'container': 'dict', 'custom_properties': {}}, + {'id': 'b', 'label': 'b', 'type': 'number', + 'depth': 1, 'container': 'dict', 'custom_properties': {}}, + {'id': 'c', 'label': 'c', 'type': 'string', + 'depth': 1, 'container': 'dict', 'custom_properties': {}}] + + input: { ('w', 'string', 'word'): ('c', 'number', 'count') } + output: [{'id': 'w', 'label': 'word', 'type': 'string', + 'depth': 0, 'container': 'dict', 'custom_properties': {}}, + {'id': 'c', 'label': 'count', 'type': 'number', + 'depth': 1, 'container': 'scalar', 'custom_properties': {}}] + + input: {'a': ('number', 'column a'), 'b': ('string', 'column b')} + output: [{'id': 'a', 'label': 'column a', 'type': 'number', 'depth': 0, + 'container': 'dict', 'custom_properties': {}}, + {'id': 'b', 'label': 'column b', 'type': 'string', 'depth': 0, + 'container': 'dict', 'custom_properties': {}} + + NOTE: there might be ambiguity in the case of a dictionary representation + of a single column. For example, the following description can be parsed + in 2 different ways: {'a': ('b', 'c')} can be thought of a single column + with the id 'a', of type 'b' and the label 'c', or as 2 columns: one named + 'a', and the other named 'b' of type 'c'. We choose the first option by + default, and in case the second option is the right one, it is possible to + make the key into a tuple (i.e. {('a',): ('b', 'c')}) or add more info + into the tuple, thus making it look like this: {'a': ('b', 'c', 'b', {})} + -- second 'b' is the label, and {} is the custom properties field. + """ + # For the recursion step, we check for a scalar object (string or tuple) + if isinstance(table_description, (types.StringTypes, tuple)): + parsed_col = DataTable.ColumnTypeParser(table_description) + parsed_col["depth"] = depth + parsed_col["container"] = "scalar" + return [parsed_col] + + # Since it is not scalar, table_description must be iterable. + if not hasattr(table_description, "__iter__"): + raise DataTableException("Expected an iterable object, got %s" % + type(table_description)) + if not isinstance(table_description, dict): + # We expects a non-dictionary iterable item. + columns = [] + for desc in table_description: + parsed_col = DataTable.ColumnTypeParser(desc) + parsed_col["depth"] = depth + parsed_col["container"] = "iter" + columns.append(parsed_col) + if not columns: + raise DataTableException("Description iterable objects should not" + " be empty.") + return columns + # The other case is a dictionary + if not table_description: + raise DataTableException("Empty dictionaries are not allowed inside" + " description") + + # To differentiate between the two cases of more levels below or this is + # the most inner dictionary, we consider the number of keys (more then one + # key is indication for most inner dictionary) and the type of the key and + # value in case of only 1 key (if the type of key is string and the type of + # the value is a tuple of 0-3 items, we assume this is the most inner + # dictionary). + # NOTE: this way of differentiating might create ambiguity. See docs. + if (len(table_description) != 1 or + (isinstance(table_description.keys()[0], types.StringTypes) and + isinstance(table_description.values()[0], tuple) and + len(table_description.values()[0]) < 4)): + # This is the most inner dictionary. Parsing types. + columns = [] + # We sort the items, equivalent to sort the keys since they are unique + for key, value in sorted(table_description.items()): + # We parse the column type as (key, type) or (key, type, label) using + # ColumnTypeParser. + if isinstance(value, tuple): + parsed_col = DataTable.ColumnTypeParser((key,) + value) + else: + parsed_col = DataTable.ColumnTypeParser((key, value)) + parsed_col["depth"] = depth + parsed_col["container"] = "dict" + columns.append(parsed_col) + return columns + # This is an outer dictionary, must have at most one key. + parsed_col = DataTable.ColumnTypeParser(table_description.keys()[0]) + parsed_col["depth"] = depth + parsed_col["container"] = "dict" + return ([parsed_col] + + DataTable.TableDescriptionParser(table_description.values()[0], + depth=depth + 1)) + + @property + def columns(self): + """Returns the parsed table description.""" + return self.__columns + + def NumberOfRows(self): + """Returns the number of rows in the current data stored in the table.""" + return len(self.__data) + + def SetRowsCustomProperties(self, rows, custom_properties): + """Sets the custom properties for given row(s). + + Can accept a single row or an iterable of rows. + Sets the given custom properties for all specified rows. + + Args: + rows: The row, or rows, to set the custom properties for. + custom_properties: A string to string dictionary of custom properties to + set for all rows. + """ + if not hasattr(rows, "__iter__"): + rows = [rows] + for row in rows: + self.__data[row] = (self.__data[row][0], custom_properties) + + def LoadData(self, data, custom_properties=None): + """Loads new rows to the data table, clearing existing rows. + + May also set the custom_properties for the added rows. The given custom + properties dictionary specifies the dictionary that will be used for *all* + given rows. + + Args: + data: The rows that the table will contain. + custom_properties: A dictionary of string to string to set as the custom + properties for all rows. + """ + self.__data = [] + self.AppendData(data, custom_properties) + + def AppendData(self, data, custom_properties=None): + """Appends new data to the table. + + Data is appended in rows. Data must comply with + the table schema passed in to __init__(). See CoerceValue() for a list + of acceptable data types. See the class documentation for more information + and examples of schema and data values. + + Args: + data: The row to add to the table. The data must conform to the table + description format. + custom_properties: A dictionary of string to string, representing the + custom properties to add to all the rows. + + Raises: + DataTableException: The data structure does not match the description. + """ + # If the maximal depth is 0, we simply iterate over the data table + # lines and insert them using _InnerAppendData. Otherwise, we simply + # let the _InnerAppendData handle all the levels. + if not self.__columns[-1]["depth"]: + for row in data: + self._InnerAppendData(({}, custom_properties), row, 0) + else: + self._InnerAppendData(({}, custom_properties), data, 0) + + def _InnerAppendData(self, prev_col_values, data, col_index): + """Inner function to assist LoadData.""" + # We first check that col_index has not exceeded the columns size + if col_index >= len(self.__columns): + raise DataTableException("The data does not match description, too deep") + + # Dealing with the scalar case, the data is the last value. + if self.__columns[col_index]["container"] == "scalar": + prev_col_values[0][self.__columns[col_index]["id"]] = data + self.__data.append(prev_col_values) + return + + if self.__columns[col_index]["container"] == "iter": + if not hasattr(data, "__iter__") or isinstance(data, dict): + raise DataTableException("Expected iterable object, got %s" % + type(data)) + # We only need to insert the rest of the columns + # If there are less items than expected, we only add what there is. + for value in data: + if col_index >= len(self.__columns): + raise DataTableException("Too many elements given in data") + prev_col_values[0][self.__columns[col_index]["id"]] = value + col_index += 1 + self.__data.append(prev_col_values) + return + + # We know the current level is a dictionary, we verify the type. + if not isinstance(data, dict): + raise DataTableException("Expected dictionary at current level, got %s" % + type(data)) + # We check if this is the last level + if self.__columns[col_index]["depth"] == self.__columns[-1]["depth"]: + # We need to add the keys in the dictionary as they are + for col in self.__columns[col_index:]: + if col["id"] in data: + prev_col_values[0][col["id"]] = data[col["id"]] + self.__data.append(prev_col_values) + return + + # We have a dictionary in an inner depth level. + if not data.keys(): + # In case this is an empty dictionary, we add a record with the columns + # filled only until this point. + self.__data.append(prev_col_values) + else: + for key in sorted(data): + col_values = dict(prev_col_values[0]) + col_values[self.__columns[col_index]["id"]] = key + self._InnerAppendData((col_values, prev_col_values[1]), + data[key], col_index + 1) + + def _PreparedData(self, order_by=()): + """Prepares the data for enumeration - sorting it by order_by. + + Args: + order_by: Optional. Specifies the name of the column(s) to sort by, and + (optionally) which direction to sort in. Default sort direction + is asc. Following formats are accepted: + "string_col_name" -- For a single key in default (asc) order. + ("string_col_name", "asc|desc") -- For a single key. + [("col_1","asc|desc"), ("col_2","asc|desc")] -- For more than + one column, an array of tuples of (col_name, "asc|desc"). + + Returns: + The data sorted by the keys given. + + Raises: + DataTableException: Sort direction not in 'asc' or 'desc' + """ + if not order_by: + return self.__data + + proper_sort_keys = [] + if isinstance(order_by, types.StringTypes) or ( + isinstance(order_by, tuple) and len(order_by) == 2 and + order_by[1].lower() in ["asc", "desc"]): + order_by = (order_by,) + for key in order_by: + if isinstance(key, types.StringTypes): + proper_sort_keys.append((key, 1)) + elif (isinstance(key, (list, tuple)) and len(key) == 2 and + key[1].lower() in ("asc", "desc")): + proper_sort_keys.append((key[0], key[1].lower() == "asc" and 1 or -1)) + else: + raise DataTableException("Expected tuple with second value: " + "'asc' or 'desc'") + + def SortCmpFunc(row1, row2): + """cmp function for sorted. Compares by keys and 'asc'/'desc' keywords.""" + for key, asc_mult in proper_sort_keys: + cmp_result = asc_mult * cmp(row1[0].get(key), row2[0].get(key)) + if cmp_result: + return cmp_result + return 0 + + return sorted(self.__data, cmp=SortCmpFunc) + + def ToJSCode(self, name, columns_order=None, order_by=()): + """Writes the data table as a JS code string. + + This method writes a string of JS code that can be run to + generate a DataTable with the specified data. Typically used for debugging + only. + + Args: + name: The name of the table. The name would be used as the DataTable's + variable name in the created JS code. + columns_order: Optional. Specifies the order of columns in the + output table. Specify a list of all column IDs in the order + in which you want the table created. + Note that you must list all column IDs in this parameter, + if you use it. + order_by: Optional. Specifies the name of the column(s) to sort by. + Passed as is to _PreparedData. + + Returns: + A string of JS code that, when run, generates a DataTable with the given + name and the data stored in the DataTable object. + Example result: + "var tab1 = new google.visualization.DataTable(); + tab1.addColumn("string", "a", "a"); + tab1.addColumn("number", "b", "b"); + tab1.addColumn("boolean", "c", "c"); + tab1.addRows(10); + tab1.setCell(0, 0, "a"); + tab1.setCell(0, 1, 1, null, {"foo": "bar"}); + tab1.setCell(0, 2, true); + ... + tab1.setCell(9, 0, "c"); + tab1.setCell(9, 1, 3, "3$"); + tab1.setCell(9, 2, false);" + + Raises: + DataTableException: The data does not match the type. + """ + + encoder = DataTableJSONEncoder() + + if columns_order is None: + columns_order = [col["id"] for col in self.__columns] + col_dict = dict([(col["id"], col) for col in self.__columns]) + + # We first create the table with the given name + jscode = "var %s = new google.visualization.DataTable();\n" % name + if self.custom_properties: + jscode += "%s.setTableProperties(%s);\n" % ( + name, encoder.encode(self.custom_properties)) + + # We add the columns to the table + for i, col in enumerate(columns_order): + jscode += "%s.addColumn(%s, %s, %s);\n" % ( + name, + encoder.encode(col_dict[col]["type"]), + encoder.encode(col_dict[col]["label"]), + encoder.encode(col_dict[col]["id"])) + if col_dict[col]["custom_properties"]: + jscode += "%s.setColumnProperties(%d, %s);\n" % ( + name, i, encoder.encode(col_dict[col]["custom_properties"])) + jscode += "%s.addRows(%d);\n" % (name, len(self.__data)) + + # We now go over the data and add each row + for (i, (row, cp)) in enumerate(self._PreparedData(order_by)): + # We add all the elements of this row by their order + for (j, col) in enumerate(columns_order): + if col not in row or row[col] is None: + continue + value = self.CoerceValue(row[col], col_dict[col]["type"]) + if isinstance(value, tuple): + cell_cp = "" + if len(value) == 3: + cell_cp = ", %s" % encoder.encode(row[col][2]) + # We have a formatted value or custom property as well + jscode += ("%s.setCell(%d, %d, %s, %s%s);\n" % + (name, i, j, + self.EscapeForJSCode(encoder, value[0]), + self.EscapeForJSCode(encoder, value[1]), cell_cp)) + else: + jscode += "%s.setCell(%d, %d, %s);\n" % ( + name, i, j, self.EscapeForJSCode(encoder, value)) + if cp: + jscode += "%s.setRowProperties(%d, %s);\n" % ( + name, i, encoder.encode(cp)) + return jscode + + def ToHtml(self, columns_order=None, order_by=()): + """Writes the data table as an HTML table code string. + + Args: + columns_order: Optional. Specifies the order of columns in the + output table. Specify a list of all column IDs in the order + in which you want the table created. + Note that you must list all column IDs in this parameter, + if you use it. + order_by: Optional. Specifies the name of the column(s) to sort by. + Passed as is to _PreparedData. + + Returns: + An HTML table code string. + Example result (the result is without the newlines): + <html><body><table border="1"> + <thead><tr><th>a</th><th>b</th><th>c</th></tr></thead> + <tbody> + <tr><td>1</td><td>"z"</td><td>2</td></tr> + <tr><td>"3$"</td><td>"w"</td><td></td></tr> + </tbody> + </table></body></html> + + Raises: + DataTableException: The data does not match the type. + """ + table_template = "<html><body><table border=\"1\">%s</table></body></html>" + columns_template = "<thead><tr>%s</tr></thead>" + rows_template = "<tbody>%s</tbody>" + row_template = "<tr>%s</tr>" + header_cell_template = "<th>%s</th>" + cell_template = "<td>%s</td>" + + if columns_order is None: + columns_order = [col["id"] for col in self.__columns] + col_dict = dict([(col["id"], col) for col in self.__columns]) + + columns_list = [] + for col in columns_order: + columns_list.append(header_cell_template % + cgi.escape(col_dict[col]["label"])) + columns_html = columns_template % "".join(columns_list) + + rows_list = [] + # We now go over the data and add each row + for row, unused_cp in self._PreparedData(order_by): + cells_list = [] + # We add all the elements of this row by their order + for col in columns_order: + # For empty string we want empty quotes (""). + value = "" + if col in row and row[col] is not None: + value = self.CoerceValue(row[col], col_dict[col]["type"]) + if isinstance(value, tuple): + # We have a formatted value and we're going to use it + cells_list.append(cell_template % cgi.escape(self.ToString(value[1]))) + else: + cells_list.append(cell_template % cgi.escape(self.ToString(value))) + rows_list.append(row_template % "".join(cells_list)) + rows_html = rows_template % "".join(rows_list) + + return table_template % (columns_html + rows_html) + + def ToCsv(self, columns_order=None, order_by=(), separator=","): + """Writes the data table as a CSV string. + + Output is encoded in UTF-8 because the Python "csv" module can't handle + Unicode properly according to its documentation. + + Args: + columns_order: Optional. Specifies the order of columns in the + output table. Specify a list of all column IDs in the order + in which you want the table created. + Note that you must list all column IDs in this parameter, + if you use it. + order_by: Optional. Specifies the name of the column(s) to sort by. + Passed as is to _PreparedData. + separator: Optional. The separator to use between the values. + + Returns: + A CSV string representing the table. + Example result: + 'a','b','c' + 1,'z',2 + 3,'w','' + + Raises: + DataTableException: The data does not match the type. + """ + + csv_buffer = cStringIO.StringIO() + writer = csv.writer(csv_buffer, delimiter=separator) + + if columns_order is None: + columns_order = [col["id"] for col in self.__columns] + col_dict = dict([(col["id"], col) for col in self.__columns]) + + writer.writerow([col_dict[col]["label"].encode("utf-8") + for col in columns_order]) + + # We now go over the data and add each row + for row, unused_cp in self._PreparedData(order_by): + cells_list = [] + # We add all the elements of this row by their order + for col in columns_order: + value = "" + if col in row and row[col] is not None: + value = self.CoerceValue(row[col], col_dict[col]["type"]) + if isinstance(value, tuple): + # We have a formatted value. Using it only for date/time types. + if col_dict[col]["type"] in ["date", "datetime", "timeofday"]: + cells_list.append(self.ToString(value[1]).encode("utf-8")) + else: + cells_list.append(self.ToString(value[0]).encode("utf-8")) + else: + cells_list.append(self.ToString(value).encode("utf-8")) + writer.writerow(cells_list) + return csv_buffer.getvalue() + + def ToTsvExcel(self, columns_order=None, order_by=()): + """Returns a file in tab-separated-format readable by MS Excel. + + Returns a file in UTF-16 little endian encoding, with tabs separating the + values. + + Args: + columns_order: Delegated to ToCsv. + order_by: Delegated to ToCsv. + + Returns: + A tab-separated little endian UTF16 file representing the table. + """ + return (self.ToCsv(columns_order, order_by, separator="\t") + .decode("utf-8").encode("UTF-16LE")) + + def _ToJSonObj(self, columns_order=None, order_by=()): + """Returns an object suitable to be converted to JSON. + + Args: + columns_order: Optional. A list of all column IDs in the order in which + you want them created in the output table. If specified, + all column IDs must be present. + order_by: Optional. Specifies the name of the column(s) to sort by. + Passed as is to _PreparedData(). + + Returns: + A dictionary object for use by ToJSon or ToJSonResponse. + """ + if columns_order is None: + columns_order = [col["id"] for col in self.__columns] + col_dict = dict([(col["id"], col) for col in self.__columns]) + + # Creating the column JSON objects + col_objs = [] + for col_id in columns_order: + col_obj = {"id": col_dict[col_id]["id"], + "label": col_dict[col_id]["label"], + "type": col_dict[col_id]["type"]} + if col_dict[col_id]["custom_properties"]: + col_obj["p"] = col_dict[col_id]["custom_properties"] + col_objs.append(col_obj) + + # Creating the rows jsons + row_objs = [] + for row, cp in self._PreparedData(order_by): + cell_objs = [] + for col in columns_order: + value = self.CoerceValue(row.get(col, None), col_dict[col]["type"]) + if value is None: + cell_obj = None + elif isinstance(value, tuple): + cell_obj = {"v": value[0]} + if len(value) > 1 and value[1] is not None: + cell_obj["f"] = value[1] + if len(value) == 3: + cell_obj["p"] = value[2] + else: + cell_obj = {"v": value} + cell_objs.append(cell_obj) + row_obj = {"c": cell_objs} + if cp: + row_obj["p"] = cp + row_objs.append(row_obj) + + json_obj = {"cols": col_objs, "rows": row_objs} + if self.custom_properties: + json_obj["p"] = self.custom_properties + + return json_obj + + def ToJSon(self, columns_order=None, order_by=()): + """Returns a string that can be used in a JS DataTable constructor. + + This method writes a JSON string that can be passed directly into a Google + Visualization API DataTable constructor. Use this output if you are + hosting the visualization HTML on your site, and want to code the data + table in Python. Pass this string into the + google.visualization.DataTable constructor, e.g,: + ... on my page that hosts my visualization ... + google.setOnLoadCallback(drawTable); + function drawTable() { + var data = new google.visualization.DataTable(_my_JSon_string, 0.6); + myTable.draw(data); + } + + Args: + columns_order: Optional. Specifies the order of columns in the + output table. Specify a list of all column IDs in the order + in which you want the table created. + Note that you must list all column IDs in this parameter, + if you use it. + order_by: Optional. Specifies the name of the column(s) to sort by. + Passed as is to _PreparedData(). + + Returns: + A JSon constructor string to generate a JS DataTable with the data + stored in the DataTable object. + Example result (the result is without the newlines): + {cols: [{id:"a",label:"a",type:"number"}, + {id:"b",label:"b",type:"string"}, + {id:"c",label:"c",type:"number"}], + rows: [{c:[{v:1},{v:"z"},{v:2}]}, c:{[{v:3,f:"3$"},{v:"w"},{v:null}]}], + p: {'foo': 'bar'}} + + Raises: + DataTableException: The data does not match the type. + """ + + encoder = DataTableJSONEncoder() + return encoder.encode( + self._ToJSonObj(columns_order, order_by)).encode("utf-8") + + def ToJSonResponse(self, columns_order=None, order_by=(), req_id=0, + response_handler="google.visualization.Query.setResponse"): + """Writes a table as a JSON response that can be returned as-is to a client. + + This method writes a JSON response to return to a client in response to a + Google Visualization API query. This string can be processed by the calling + page, and is used to deliver a data table to a visualization hosted on + a different page. + + Args: + columns_order: Optional. Passed straight to self.ToJSon(). + order_by: Optional. Passed straight to self.ToJSon(). + req_id: Optional. The response id, as retrieved by the request. + response_handler: Optional. The response handler, as retrieved by the + request. + + Returns: + A JSON response string to be received by JS the visualization Query + object. This response would be translated into a DataTable on the + client side. + Example result (newlines added for readability): + google.visualization.Query.setResponse({ + 'version':'0.6', 'reqId':'0', 'status':'OK', + 'table': {cols: [...], rows: [...]}}); + + Note: The URL returning this string can be used as a data source by Google + Visualization Gadgets or from JS code. + """ + + response_obj = { + "version": "0.6", + "reqId": str(req_id), + "table": self._ToJSonObj(columns_order, order_by), + "status": "ok" + } + encoder = DataTableJSONEncoder() + return "%s(%s);" % (response_handler, + encoder.encode(response_obj).encode("utf-8")) + + def ToResponse(self, columns_order=None, order_by=(), tqx=""): + """Writes the right response according to the request string passed in tqx. + + This method parses the tqx request string (format of which is defined in + the documentation for implementing a data source of Google Visualization), + and returns the right response according to the request. + It parses out the "out" parameter of tqx, calls the relevant response + (ToJSonResponse() for "json", ToCsv() for "csv", ToHtml() for "html", + ToTsvExcel() for "tsv-excel") and passes the response function the rest of + the relevant request keys. + + Args: + columns_order: Optional. Passed as is to the relevant response function. + order_by: Optional. Passed as is to the relevant response function. + tqx: Optional. The request string as received by HTTP GET. Should be in + the format "key1:value1;key2:value2...". All keys have a default + value, so an empty string will just do the default (which is calling + ToJSonResponse() with no extra parameters). + + Returns: + A response string, as returned by the relevant response function. + + Raises: + DataTableException: One of the parameters passed in tqx is not supported. + """ + tqx_dict = {} + if tqx: + tqx_dict = dict(opt.split(":") for opt in tqx.split(";")) + if tqx_dict.get("version", "0.6") != "0.6": + raise DataTableException( + "Version (%s) passed by request is not supported." + % tqx_dict["version"]) + + if tqx_dict.get("out", "json") == "json": + response_handler = tqx_dict.get("responseHandler", + "google.visualization.Query.setResponse") + return self.ToJSonResponse(columns_order, order_by, + req_id=tqx_dict.get("reqId", 0), + response_handler=response_handler) + elif tqx_dict["out"] == "html": + return self.ToHtml(columns_order, order_by) + elif tqx_dict["out"] == "csv": + return self.ToCsv(columns_order, order_by) + elif tqx_dict["out"] == "tsv-excel": + return self.ToTsvExcel(columns_order, order_by) + else: + raise DataTableException( + "'out' parameter: '%s' is not supported" % tqx_dict["out"]) diff --git a/third_party/aom/test/hadamard_test.cc b/third_party/aom/test/hadamard_test.cc new file mode 100644 index 0000000000..b01e78faaa --- /dev/null +++ b/third_party/aom/test/hadamard_test.cc @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <algorithm> +#include <ostream> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { + +using libaom_test::ACMRandom; + +using HadamardFunc = void (*)(const int16_t *a, ptrdiff_t a_stride, + tran_low_t *b); +// Low precision version of Hadamard Transform +using HadamardLPFunc = void (*)(const int16_t *a, ptrdiff_t a_stride, + int16_t *b); +// Low precision version of Hadamard Transform 8x8 - Dual +using HadamardLP8x8DualFunc = void (*)(const int16_t *a, ptrdiff_t a_stride, + int16_t *b); + +template <typename OutputType> +void Hadamard4x4(const OutputType *a, OutputType *out) { + OutputType b[8]; + for (int i = 0; i < 4; i += 2) { + b[i + 0] = (a[i * 4] + a[(i + 1) * 4]) >> 1; + b[i + 1] = (a[i * 4] - a[(i + 1) * 4]) >> 1; + } + + out[0] = b[0] + b[2]; + out[1] = b[1] + b[3]; + out[2] = b[0] - b[2]; + out[3] = b[1] - b[3]; +} + +template <typename OutputType> +void ReferenceHadamard4x4(const int16_t *a, int a_stride, OutputType *b) { + OutputType input[16]; + OutputType buf[16]; + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + input[i * 4 + j] = static_cast<OutputType>(a[i * a_stride + j]); + } + } + for (int i = 0; i < 4; ++i) Hadamard4x4(input + i, buf + i * 4); + for (int i = 0; i < 4; ++i) Hadamard4x4(buf + i, b + i * 4); + + // Extra transpose to match C and SSE2 behavior(i.e., aom_hadamard_4x4). + for (int i = 0; i < 4; i++) { + for (int j = i + 1; j < 4; j++) { + OutputType temp = b[j * 4 + i]; + b[j * 4 + i] = b[i * 4 + j]; + b[i * 4 + j] = temp; + } + } +} + +template <typename OutputType> +void HadamardLoop(const OutputType *a, OutputType *out) { + OutputType b[8]; + for (int i = 0; i < 8; i += 2) { + b[i + 0] = a[i * 8] + a[(i + 1) * 8]; + b[i + 1] = a[i * 8] - a[(i + 1) * 8]; + } + OutputType c[8]; + for (int i = 0; i < 8; i += 4) { + c[i + 0] = b[i + 0] + b[i + 2]; + c[i + 1] = b[i + 1] + b[i + 3]; + c[i + 2] = b[i + 0] - b[i + 2]; + c[i + 3] = b[i + 1] - b[i + 3]; + } + out[0] = c[0] + c[4]; + out[7] = c[1] + c[5]; + out[3] = c[2] + c[6]; + out[4] = c[3] + c[7]; + out[2] = c[0] - c[4]; + out[6] = c[1] - c[5]; + out[1] = c[2] - c[6]; + out[5] = c[3] - c[7]; +} + +template <typename OutputType> +void ReferenceHadamard8x8(const int16_t *a, int a_stride, OutputType *b) { + OutputType input[64]; + OutputType buf[64]; + for (int i = 0; i < 8; ++i) { + for (int j = 0; j < 8; ++j) { + input[i * 8 + j] = static_cast<OutputType>(a[i * a_stride + j]); + } + } + for (int i = 0; i < 8; ++i) HadamardLoop(input + i, buf + i * 8); + for (int i = 0; i < 8; ++i) HadamardLoop(buf + i, b + i * 8); + + // Extra transpose to match SSE2 behavior (i.e., aom_hadamard_8x8 and + // aom_hadamard_lp_8x8). + for (int i = 0; i < 8; i++) { + for (int j = i + 1; j < 8; j++) { + OutputType temp = b[j * 8 + i]; + b[j * 8 + i] = b[i * 8 + j]; + b[i * 8 + j] = temp; + } + } +} + +template <typename OutputType> +void ReferenceHadamard8x8Dual(const int16_t *a, int a_stride, OutputType *b) { + /* The source is a 8x16 block. The destination is rearranged to 8x16. + * Input is 9 bit. */ + ReferenceHadamard8x8(a, a_stride, b); + ReferenceHadamard8x8(a + 8, a_stride, b + 64); +} + +template <typename OutputType> +void ReferenceHadamard16x16(const int16_t *a, int a_stride, OutputType *b, + bool shift) { + /* The source is a 16x16 block. The destination is rearranged to 8x32. + * Input is 9 bit. */ + ReferenceHadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0); + ReferenceHadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64); + ReferenceHadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128); + ReferenceHadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192); + + /* Overlay the 8x8 blocks and combine. */ + for (int i = 0; i < 64; ++i) { + /* 8x8 steps the range up to 15 bits. */ + const OutputType a0 = b[0]; + const OutputType a1 = b[64]; + const OutputType a2 = b[128]; + const OutputType a3 = b[192]; + + /* Prevent the result from escaping int16_t. */ + const OutputType b0 = (a0 + a1) >> 1; + const OutputType b1 = (a0 - a1) >> 1; + const OutputType b2 = (a2 + a3) >> 1; + const OutputType b3 = (a2 - a3) >> 1; + + /* Store a 16 bit value. */ + b[0] = b0 + b2; + b[64] = b1 + b3; + b[128] = b0 - b2; + b[192] = b1 - b3; + + ++b; + } + + if (shift) { + b -= 64; + // Extra shift to match aom_hadamard_16x16_c and aom_hadamard_16x16_avx2. + for (int i = 0; i < 16; i++) { + for (int j = 0; j < 4; j++) { + OutputType temp = b[i * 16 + 4 + j]; + b[i * 16 + 4 + j] = b[i * 16 + 8 + j]; + b[i * 16 + 8 + j] = temp; + } + } + } +} + +template <typename OutputType> +void ReferenceHadamard32x32(const int16_t *a, int a_stride, OutputType *b, + bool shift) { + ReferenceHadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0, shift); + ReferenceHadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256, shift); + ReferenceHadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512, shift); + ReferenceHadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768, shift); + + for (int i = 0; i < 256; ++i) { + const OutputType a0 = b[0]; + const OutputType a1 = b[256]; + const OutputType a2 = b[512]; + const OutputType a3 = b[768]; + + const OutputType b0 = (a0 + a1) >> 2; + const OutputType b1 = (a0 - a1) >> 2; + const OutputType b2 = (a2 + a3) >> 2; + const OutputType b3 = (a2 - a3) >> 2; + + b[0] = b0 + b2; + b[256] = b1 + b3; + b[512] = b0 - b2; + b[768] = b1 - b3; + + ++b; + } +} + +template <typename OutputType> +void ReferenceHadamard(const int16_t *a, int a_stride, OutputType *b, int bw, + int bh, bool shift) { + if (bw == 32 && bh == 32) { + ReferenceHadamard32x32(a, a_stride, b, shift); + } else if (bw == 16 && bh == 16) { + ReferenceHadamard16x16(a, a_stride, b, shift); + } else if (bw == 8 && bh == 8) { + ReferenceHadamard8x8(a, a_stride, b); + } else if (bw == 4 && bh == 4) { + ReferenceHadamard4x4(a, a_stride, b); + } else if (bw == 8 && bh == 16) { + ReferenceHadamard8x8Dual(a, a_stride, b); + } else { + GTEST_FAIL() << "Invalid Hadamard transform size " << bw << bh << std::endl; + } +} + +template <typename HadamardFuncType> +struct FuncWithSize { + FuncWithSize(HadamardFuncType f, int bw, int bh) + : func(f), block_width(bw), block_height(bh) {} + HadamardFuncType func; + int block_width; + int block_height; +}; + +using HadamardFuncWithSize = FuncWithSize<HadamardFunc>; +using HadamardLPFuncWithSize = FuncWithSize<HadamardLPFunc>; +using HadamardLP8x8DualFuncWithSize = FuncWithSize<HadamardLP8x8DualFunc>; + +template <typename OutputType, typename HadamardFuncType> +class HadamardTestBase + : public ::testing::TestWithParam<FuncWithSize<HadamardFuncType>> { + public: + HadamardTestBase(const FuncWithSize<HadamardFuncType> &func_param, + bool do_shift) { + h_func_ = func_param.func; + bw_ = func_param.block_width; + bh_ = func_param.block_height; + shift_ = do_shift; + } + + void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); } + + // The Rand() function generates values in the range [-((1 << BitDepth) - 1), + // (1 << BitDepth) - 1]. This is because the input to the Hadamard transform + // is the residual pixel, which is defined as 'source pixel - predicted + // pixel'. Source pixel and predicted pixel take values in the range + // [0, (1 << BitDepth) - 1] and thus the residual pixel ranges from + // -((1 << BitDepth) - 1) to ((1 << BitDepth) - 1). + virtual int16_t Rand() = 0; + + void CompareReferenceRandom() { + const int kMaxBlockSize = 32 * 32; + const int block_size = bw_ * bh_; + + DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]); + DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]); + memset(a, 0, sizeof(a)); + memset(b, 0, sizeof(b)); + + OutputType b_ref[kMaxBlockSize]; + memset(b_ref, 0, sizeof(b_ref)); + + for (int i = 0; i < block_size; ++i) a[i] = Rand(); + ReferenceHadamard(a, bw_, b_ref, bw_, bh_, shift_); + API_REGISTER_STATE_CHECK(h_func_(a, bw_, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + block_size); + std::sort(b_ref, b_ref + block_size); + EXPECT_EQ(memcmp(b, b_ref, sizeof(b)), 0); + } + + void CompareReferenceExtreme() { + const int kMaxBlockSize = 32 * 32; + const int block_size = bw_ * bh_; + const int kBitDepth = 8; + DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]); + DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]); + memset(b, 0, sizeof(b)); + + OutputType b_ref[kMaxBlockSize]; + memset(b_ref, 0, sizeof(b_ref)); + for (int i = 0; i < 2; ++i) { + const int sign = (i == 0) ? 1 : -1; + for (int j = 0; j < block_size; ++j) a[j] = sign * ((1 << kBitDepth) - 1); + + ReferenceHadamard(a, bw_, b_ref, bw_, bh_, shift_); + API_REGISTER_STATE_CHECK(h_func_(a, bw_, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + block_size); + std::sort(b_ref, b_ref + block_size); + EXPECT_EQ(memcmp(b, b_ref, sizeof(b)), 0); + } + } + + void VaryStride() { + const int kMaxBlockSize = 32 * 32; + const int block_size = bw_ * bh_; + + DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]); + DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]); + memset(a, 0, sizeof(a)); + for (int i = 0; i < block_size * 8; ++i) a[i] = Rand(); + + OutputType b_ref[kMaxBlockSize]; + for (int i = 8; i < 64; i += 8) { + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + ReferenceHadamard(a, i, b_ref, bw_, bh_, shift_); + API_REGISTER_STATE_CHECK(h_func_(a, i, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + block_size); + std::sort(b_ref, b_ref + block_size); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); + } + } + + void SpeedTest(int times) { + const int kMaxBlockSize = 32 * 32; + DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]); + DECLARE_ALIGNED(16, OutputType, output[kMaxBlockSize]); + memset(input, 1, sizeof(input)); + memset(output, 0, sizeof(output)); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < times; ++i) { + h_func_(input, bw_, output); + } + aom_usec_timer_mark(&timer); + + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("Hadamard%dx%d[%12d runs]: %d us\n", bw_, bh_, times, elapsed_time); + } + + protected: + ACMRandom rnd_; + + private: + HadamardFuncType h_func_; + int bw_; + int bh_; + bool shift_; +}; + +class HadamardLowbdTest : public HadamardTestBase<tran_low_t, HadamardFunc> { + public: + HadamardLowbdTest() : HadamardTestBase(GetParam(), /*do_shift=*/true) {} + // Use values between -255 (0xFF01) and 255 (0x00FF) + int16_t Rand() override { + int16_t src = rnd_.Rand8(); + int16_t pred = rnd_.Rand8(); + return src - pred; + } +}; + +TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); } + +TEST_P(HadamardLowbdTest, CompareReferenceExtreme) { + CompareReferenceExtreme(); +} + +TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); } + +TEST_P(HadamardLowbdTest, DISABLED_SpeedTest) { SpeedTest(1000000); } + +INSTANTIATE_TEST_SUITE_P( + C, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_c, 4, 4), + HadamardFuncWithSize(&aom_hadamard_8x8_c, 8, 8), + HadamardFuncWithSize(&aom_hadamard_16x16_c, 16, 16), + HadamardFuncWithSize(&aom_hadamard_32x32_c, 32, 32))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_sse2, 4, 4), + HadamardFuncWithSize(&aom_hadamard_8x8_sse2, 8, 8), + HadamardFuncWithSize(&aom_hadamard_16x16_sse2, 16, 16), + HadamardFuncWithSize(&aom_hadamard_32x32_sse2, 32, 32))); +#endif // HAVE_SSE2 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&aom_hadamard_16x16_avx2, 16, 16), + HadamardFuncWithSize(&aom_hadamard_32x32_avx2, 32, 32))); +#endif // HAVE_AVX2 + +// TODO(aomedia:3314): Disable NEON unit test for now, since hadamard 16x16 NEON +// need modifications to match C/AVX2 behavior. +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_neon, 4, 4), + HadamardFuncWithSize(&aom_hadamard_8x8_neon, 8, 8), + HadamardFuncWithSize(&aom_hadamard_16x16_neon, 16, 16), + HadamardFuncWithSize(&aom_hadamard_32x32_neon, 32, 32))); +#endif // HAVE_NEON + +#if CONFIG_AV1_HIGHBITDEPTH +class HadamardHighbdTest : public HadamardTestBase<tran_low_t, HadamardFunc> { + protected: + HadamardHighbdTest() : HadamardTestBase(GetParam(), /*do_shift=*/true) {} + // Use values between -4095 (0xF001) and 4095 (0x0FFF) + int16_t Rand() override { + int16_t src = rnd_.Rand12(); + int16_t pred = rnd_.Rand12(); + return src - pred; + } +}; + +TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); } + +TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); } + +TEST_P(HadamardHighbdTest, DISABLED_Speed) { + SpeedTest(10); + SpeedTest(10000); + SpeedTest(10000000); +} + +INSTANTIATE_TEST_SUITE_P( + C, HadamardHighbdTest, + ::testing::Values( + HadamardFuncWithSize(&aom_highbd_hadamard_8x8_c, 8, 8), + HadamardFuncWithSize(&aom_highbd_hadamard_16x16_c, 16, 16), + HadamardFuncWithSize(&aom_highbd_hadamard_32x32_c, 32, 32))); + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, HadamardHighbdTest, + ::testing::Values( + HadamardFuncWithSize(&aom_highbd_hadamard_8x8_avx2, 8, 8), + HadamardFuncWithSize(&aom_highbd_hadamard_16x16_avx2, 16, 16), + HadamardFuncWithSize(&aom_highbd_hadamard_32x32_avx2, 32, 32))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, HadamardHighbdTest, + ::testing::Values( + HadamardFuncWithSize(&aom_highbd_hadamard_8x8_neon, 8, 8), + HadamardFuncWithSize(&aom_highbd_hadamard_16x16_neon, 16, 16), + HadamardFuncWithSize(&aom_highbd_hadamard_32x32_neon, 32, 32))); +#endif // HAVE_NEON + +#endif // CONFIG_AV1_HIGHBITDEPTH + +// Tests for low precision +class HadamardLowbdLPTest : public HadamardTestBase<int16_t, HadamardLPFunc> { + public: + HadamardLowbdLPTest() : HadamardTestBase(GetParam(), /*do_shift=*/false) {} + // Use values between -255 (0xFF01) and 255 (0x00FF) + int16_t Rand() override { + int16_t src = rnd_.Rand8(); + int16_t pred = rnd_.Rand8(); + return src - pred; + } +}; + +TEST_P(HadamardLowbdLPTest, CompareReferenceRandom) { + CompareReferenceRandom(); +} + +TEST_P(HadamardLowbdLPTest, VaryStride) { VaryStride(); } + +TEST_P(HadamardLowbdLPTest, DISABLED_SpeedTest) { SpeedTest(1000000); } + +INSTANTIATE_TEST_SUITE_P( + C, HadamardLowbdLPTest, + ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_c, 8, 8), + HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_c, 16, + 16))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, HadamardLowbdLPTest, + ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_sse2, 8, 8), + HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_sse2, 16, + 16))); +#endif // HAVE_SSE2 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, HadamardLowbdLPTest, + ::testing::Values(HadamardLPFuncWithSize( + &aom_hadamard_lp_16x16_avx2, 16, 16))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, HadamardLowbdLPTest, + ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_neon, 8, 8), + HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_neon, 16, + 16))); +#endif // HAVE_NEON + +// Tests for 8x8 dual low precision +class HadamardLowbdLP8x8DualTest + : public HadamardTestBase<int16_t, HadamardLP8x8DualFunc> { + public: + HadamardLowbdLP8x8DualTest() + : HadamardTestBase(GetParam(), /*do_shift=*/false) {} + // Use values between -255 (0xFF01) and 255 (0x00FF) + int16_t Rand() override { + int16_t src = rnd_.Rand8(); + int16_t pred = rnd_.Rand8(); + return src - pred; + } +}; + +TEST_P(HadamardLowbdLP8x8DualTest, CompareReferenceRandom) { + CompareReferenceRandom(); +} + +TEST_P(HadamardLowbdLP8x8DualTest, VaryStride) { VaryStride(); } + +TEST_P(HadamardLowbdLP8x8DualTest, DISABLED_SpeedTest) { SpeedTest(1000000); } + +INSTANTIATE_TEST_SUITE_P(C, HadamardLowbdLP8x8DualTest, + ::testing::Values(HadamardLP8x8DualFuncWithSize( + &aom_hadamard_lp_8x8_dual_c, 8, 16))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, HadamardLowbdLP8x8DualTest, + ::testing::Values(HadamardLP8x8DualFuncWithSize( + &aom_hadamard_lp_8x8_dual_sse2, 8, 16))); +#endif // HAVE_SSE2 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, HadamardLowbdLP8x8DualTest, + ::testing::Values(HadamardLP8x8DualFuncWithSize( + &aom_hadamard_lp_8x8_dual_avx2, 8, 16))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, HadamardLowbdLP8x8DualTest, + ::testing::Values(HadamardLP8x8DualFuncWithSize( + &aom_hadamard_lp_8x8_dual_neon, 8, 16))); +#endif // HAVE_NEON + +} // namespace diff --git a/third_party/aom/test/hash_test.cc b/third_party/aom/test/hash_test.cc new file mode 100644 index 0000000000..a1de9323db --- /dev/null +++ b/third_party/aom/test/hash_test.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdlib> +#include <new> +#include <tuple> + +#include "config/aom_config.h" +#include "config/av1_rtcd.h" + +#include "aom_ports/aom_timer.h" +#include "av1/encoder/hash.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +typedef uint32_t (*get_crc32c_value_func)(void *calculator, uint8_t *p, + size_t length); + +typedef std::tuple<get_crc32c_value_func, int> HashParam; + +class AV1Crc32cHashTest : public ::testing::TestWithParam<HashParam> { + public: + ~AV1Crc32cHashTest() override; + void SetUp() override; + + void TearDown() override; + + protected: + void RunCheckOutput(get_crc32c_value_func test_impl); + void RunSpeedTest(get_crc32c_value_func test_impl); + + void RunZeroTest(get_crc32c_value_func test_impl); + + libaom_test::ACMRandom rnd_; + CRC32C calc_; + uint8_t *buffer_; + int bsize_; + size_t length_; +}; + +AV1Crc32cHashTest::~AV1Crc32cHashTest() = default; + +void AV1Crc32cHashTest::SetUp() { + rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); + av1_crc32c_calculator_init(&calc_); + + bsize_ = GET_PARAM(1); + length_ = bsize_ * bsize_ * sizeof(uint16_t); + buffer_ = new uint8_t[length_]; + ASSERT_NE(buffer_, nullptr); + for (size_t i = 0; i < length_; ++i) { + buffer_[i] = rnd_.Rand8(); + } +} + +void AV1Crc32cHashTest::TearDown() { delete[] buffer_; } + +void AV1Crc32cHashTest::RunCheckOutput(get_crc32c_value_func test_impl) { + get_crc32c_value_func ref_impl = av1_get_crc32c_value_c; + // for the same buffer crc should be the same + uint32_t crc0 = test_impl(&calc_, buffer_, length_); + uint32_t crc1 = test_impl(&calc_, buffer_, length_); + uint32_t crc2 = ref_impl(&calc_, buffer_, length_); + ASSERT_EQ(crc0, crc1); + ASSERT_EQ(crc0, crc2); // should equal to software version + // modify buffer + buffer_[0] += 1; + uint32_t crc3 = test_impl(&calc_, buffer_, length_); + uint32_t crc4 = ref_impl(&calc_, buffer_, length_); + ASSERT_NE(crc0, crc3); // crc shoud not equal to previous one + ASSERT_EQ(crc3, crc4); +} + +void AV1Crc32cHashTest::RunSpeedTest(get_crc32c_value_func test_impl) { + get_crc32c_value_func impls[] = { av1_get_crc32c_value_c, test_impl }; + const int repeat = 10000000 / (bsize_ + bsize_); + + aom_usec_timer timer; + double time[2]; + for (int i = 0; i < 2; ++i) { + aom_usec_timer_start(&timer); + for (int j = 0; j < repeat; ++j) { + impls[i](&calc_, buffer_, length_); + } + aom_usec_timer_mark(&timer); + time[i] = static_cast<double>(aom_usec_timer_elapsed(&timer)); + } + printf("hash %3dx%-3d:%7.2f/%7.2fus", bsize_, bsize_, time[0], time[1]); + printf("(%3.2f)\n", time[0] / time[1]); +} + +void AV1Crc32cHashTest::RunZeroTest(get_crc32c_value_func test_impl) { + uint8_t buffer0[1024] = { 0 }; + // for buffer with different size the crc should not be the same + const uint32_t crc0 = test_impl(&calc_, buffer0, 32); + const uint32_t crc1 = test_impl(&calc_, buffer0, 128); + const uint32_t crc2 = test_impl(&calc_, buffer0, 1024); + ASSERT_NE(crc0, crc1); + ASSERT_NE(crc0, crc2); + ASSERT_NE(crc1, crc2); +} + +TEST_P(AV1Crc32cHashTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); } + +TEST_P(AV1Crc32cHashTest, CheckZero) { RunZeroTest(GET_PARAM(0)); } + +TEST_P(AV1Crc32cHashTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); } + +const int kValidBlockSize[] = { 64, 32, 8, 4 }; + +INSTANTIATE_TEST_SUITE_P( + C, AV1Crc32cHashTest, + ::testing::Combine(::testing::Values(&av1_get_crc32c_value_c), + ::testing::ValuesIn(kValidBlockSize))); + +#if HAVE_SSE4_2 +INSTANTIATE_TEST_SUITE_P( + SSE4_2, AV1Crc32cHashTest, + ::testing::Combine(::testing::Values(&av1_get_crc32c_value_sse4_2), + ::testing::ValuesIn(kValidBlockSize))); +#endif + +#if HAVE_ARM_CRC32 +INSTANTIATE_TEST_SUITE_P( + ARM_CRC32, AV1Crc32cHashTest, + ::testing::Combine(::testing::Values(&av1_get_crc32c_value_arm_crc32), + ::testing::ValuesIn(kValidBlockSize))); +#endif + +} // namespace diff --git a/third_party/aom/test/hbd_metrics_test.cc b/third_party/aom/test/hbd_metrics_test.cc new file mode 100644 index 0000000000..303d580c4a --- /dev/null +++ b/third_party/aom/test/hbd_metrics_test.cc @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <new> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/util.h" + +#include "config/aom_config.h" + +#include "aom_dsp/psnr.h" +#include "aom_dsp/ssim.h" +#include "aom_ports/mem.h" +#include "aom_ports/msvc.h" +#include "aom_scale/yv12config.h" + +using libaom_test::ACMRandom; + +namespace { + +typedef double (*LBDMetricFunc)(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest); +typedef double (*HBDMetricFunc)(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd); + +double compute_hbd_psnr(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd) { + PSNR_STATS psnr; + aom_calc_highbd_psnr(source, dest, &psnr, bd, in_bd); + return psnr.psnr[0]; +} + +double compute_psnr(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + PSNR_STATS psnr; + aom_calc_psnr(source, dest, &psnr); + return psnr.psnr[0]; +} + +double compute_hbd_psnrhvs(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd) { + double tempy, tempu, tempv; + return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, bd, in_bd); +} + +double compute_psnrhvs(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + double tempy, tempu, tempv; + return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, 8, 8); +} + +double compute_hbd_fastssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd) { + double tempy, tempu, tempv; + return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, bd, in_bd); +} + +double compute_fastssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + double tempy, tempu, tempv; + return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, 8, 8); +} + +double compute_hbd_aomssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, uint32_t in_bd, + uint32_t bd) { + double ssim[2], weight[2]; + aom_highbd_calc_ssim(source, dest, weight, bd, in_bd, ssim); + return 100 * pow(ssim[0] / weight[0], 8.0); +} + +double compute_aomssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + double ssim, weight; + aom_lowbd_calc_ssim(source, dest, &weight, &ssim); + return 100 * pow(ssim / weight, 8.0); +} + +class HBDMetricsTestBase { + public: + virtual ~HBDMetricsTestBase() = default; + + protected: + void RunAccuracyCheck() { + const int width = 1920; + const int height = 1080; + size_t i = 0; + const uint8_t kPixFiller = 128; + YV12_BUFFER_CONFIG lbd_src, lbd_dst; + YV12_BUFFER_CONFIG hbd_src, hbd_dst; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + double lbd_db, hbd_db; + + memset(&lbd_src, 0, sizeof(lbd_src)); + memset(&lbd_dst, 0, sizeof(lbd_dst)); + memset(&hbd_src, 0, sizeof(hbd_src)); + memset(&hbd_dst, 0, sizeof(hbd_dst)); + + aom_alloc_frame_buffer(&lbd_src, width, height, 1, 1, 0, 32, 16, 0, 0); + aom_alloc_frame_buffer(&lbd_dst, width, height, 1, 1, 0, 32, 16, 0, 0); + aom_alloc_frame_buffer(&hbd_src, width, height, 1, 1, 1, 32, 16, 0, 0); + aom_alloc_frame_buffer(&hbd_dst, width, height, 1, 1, 1, 32, 16, 0, 0); + + memset(lbd_src.buffer_alloc, kPixFiller, lbd_src.buffer_alloc_sz); + while (i < lbd_src.buffer_alloc_sz) { + uint16_t spel, dpel; + spel = lbd_src.buffer_alloc[i]; + // Create some distortion for dst buffer. + dpel = rnd.Rand8(); + lbd_dst.buffer_alloc[i] = (uint8_t)dpel; + ((uint16_t *)(hbd_src.buffer_alloc))[i] = spel << (bit_depth_ - 8); + ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8); + i++; + } + + lbd_db = lbd_metric_(&lbd_src, &lbd_dst); + hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_); + EXPECT_LE(fabs(lbd_db - hbd_db), threshold_); + + i = 0; + while (i < lbd_src.buffer_alloc_sz) { + uint16_t dpel; + // Create some small distortion for dst buffer. + dpel = 120 + (rnd.Rand8() >> 4); + lbd_dst.buffer_alloc[i] = (uint8_t)dpel; + ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8); + i++; + } + + lbd_db = lbd_metric_(&lbd_src, &lbd_dst); + hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_); + EXPECT_LE(fabs(lbd_db - hbd_db), threshold_); + + i = 0; + while (i < lbd_src.buffer_alloc_sz) { + uint16_t dpel; + // Create some small distortion for dst buffer. + dpel = 126 + (rnd.Rand8() >> 6); + lbd_dst.buffer_alloc[i] = (uint8_t)dpel; + ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8); + i++; + } + + lbd_db = lbd_metric_(&lbd_src, &lbd_dst); + hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_); + EXPECT_LE(fabs(lbd_db - hbd_db), threshold_); + + aom_free_frame_buffer(&lbd_src); + aom_free_frame_buffer(&lbd_dst); + aom_free_frame_buffer(&hbd_src); + aom_free_frame_buffer(&hbd_dst); + } + + int input_bit_depth_; + int bit_depth_; + double threshold_; + LBDMetricFunc lbd_metric_; + HBDMetricFunc hbd_metric_; +}; + +typedef std::tuple<LBDMetricFunc, HBDMetricFunc, int, int, double> + MetricTestTParam; +class HBDMetricsTest : public HBDMetricsTestBase, + public ::testing::TestWithParam<MetricTestTParam> { + public: + void SetUp() override { + lbd_metric_ = GET_PARAM(0); + hbd_metric_ = GET_PARAM(1); + input_bit_depth_ = GET_PARAM(2); + bit_depth_ = GET_PARAM(3); + threshold_ = GET_PARAM(4); + } +}; + +TEST_P(HBDMetricsTest, RunAccuracyCheck) { RunAccuracyCheck(); } + +// Allow small variation due to floating point operations. +static const double kSsim_thresh = 0.001; +// Allow some additional errors accumulated in floating point operations. +static const double kFSsim_thresh = 0.03; +// Allow some extra variation due to rounding error accumulated in dct. +static const double kPhvs_thresh = 0.3; + +INSTANTIATE_TEST_SUITE_P( + AOMSSIM, HBDMetricsTest, + ::testing::Values(MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim, + 8, 10, kSsim_thresh), + MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim, + 10, 10, kPhvs_thresh), + MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim, + 8, 12, kSsim_thresh), + MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim, + 12, 12, kPhvs_thresh))); +INSTANTIATE_TEST_SUITE_P( + FASTSSIM, HBDMetricsTest, + ::testing::Values(MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, + 8, 10, kFSsim_thresh), + MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, + 10, 10, kFSsim_thresh), + MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, + 8, 12, kFSsim_thresh), + MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, + 12, 12, kFSsim_thresh))); +INSTANTIATE_TEST_SUITE_P( + PSNRHVS, HBDMetricsTest, + ::testing::Values(MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, + 8, 10, kPhvs_thresh), + MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, + 10, 10, kPhvs_thresh), + MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, + 8, 12, kPhvs_thresh), + MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, + 12, 12, kPhvs_thresh))); +INSTANTIATE_TEST_SUITE_P( + PSNR, HBDMetricsTest, + ::testing::Values( + MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 10, kPhvs_thresh), + MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 10, 10, + kPhvs_thresh), + MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 12, kPhvs_thresh), + MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 12, 12, + kPhvs_thresh))); +} // namespace diff --git a/third_party/aom/test/hiprec_convolve_test.cc b/third_party/aom/test/hiprec_convolve_test.cc new file mode 100644 index 0000000000..78883ccddf --- /dev/null +++ b/third_party/aom/test/hiprec_convolve_test.cc @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/hiprec_convolve_test_util.h" + +using libaom_test::ACMRandom; +#if CONFIG_AV1_HIGHBITDEPTH +using libaom_test::AV1HighbdHiprecConvolve::AV1HighbdHiprecConvolveTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdHiprecConvolveTest); +#endif +using libaom_test::AV1HiprecConvolve::AV1HiprecConvolveTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HiprecConvolveTest); +using std::make_tuple; +using std::tuple; + +namespace { + +TEST_P(AV1HiprecConvolveTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); } +TEST_P(AV1HiprecConvolveTest, DISABLED_SpeedTest) { + RunSpeedTest(GET_PARAM(3)); +} +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, AV1HiprecConvolveTest, + libaom_test::AV1HiprecConvolve::BuildParams( + av1_wiener_convolve_add_src_sse2)); +#endif +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1HiprecConvolveTest, + libaom_test::AV1HiprecConvolve::BuildParams( + av1_wiener_convolve_add_src_avx2)); +#endif +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1HiprecConvolveTest, + libaom_test::AV1HiprecConvolve::BuildParams( + av1_wiener_convolve_add_src_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +#if HAVE_SSSE3 || HAVE_AVX2 || HAVE_NEON +TEST_P(AV1HighbdHiprecConvolveTest, CheckOutput) { + RunCheckOutput(GET_PARAM(4)); +} +TEST_P(AV1HighbdHiprecConvolveTest, DISABLED_SpeedTest) { + RunSpeedTest(GET_PARAM(4)); +} +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P(SSSE3, AV1HighbdHiprecConvolveTest, + libaom_test::AV1HighbdHiprecConvolve::BuildParams( + av1_highbd_wiener_convolve_add_src_ssse3)); +#endif +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, AV1HighbdHiprecConvolveTest, + libaom_test::AV1HighbdHiprecConvolve::BuildParams( + av1_highbd_wiener_convolve_add_src_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, AV1HighbdHiprecConvolveTest, + libaom_test::AV1HighbdHiprecConvolve::BuildParams( + av1_highbd_wiener_convolve_add_src_neon)); +#endif +#endif +#endif // CONFIG_AV1_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc new file mode 100644 index 0000000000..6d7902fd04 --- /dev/null +++ b/third_party/aom/test/hiprec_convolve_test_util.cc @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "test/hiprec_convolve_test_util.h" + +#include <memory> +#include <new> + +#include "av1/common/restoration.h" + +using std::make_tuple; +using std::tuple; + +namespace libaom_test { + +// Generate a random pair of filter kernels, using the ranges +// of possible values from the loop-restoration experiment +static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel, + InterpKernel vkernel, int kernel_type = 2) { + if (kernel_type == 0) { + // Low possible values for filter coefficients, 7-tap kernel + hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MINV; + hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV; + hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV; + hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); + hkernel[7] = vkernel[7] = 0; + } else if (kernel_type == 1) { + // Max possible values for filter coefficients, 7-tap kernel + hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MAXV; + hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV; + hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV; + hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); + hkernel[7] = vkernel[7] = 0; + } else if (kernel_type == 2) { + // Randomly generated values for filter coefficients, 7-tap kernel + hkernel[0] = hkernel[6] = + WIENER_FILT_TAP0_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV); + hkernel[1] = hkernel[5] = + WIENER_FILT_TAP1_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV); + hkernel[2] = hkernel[4] = + WIENER_FILT_TAP2_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV); + hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); + hkernel[7] = 0; + + vkernel[0] = vkernel[6] = + WIENER_FILT_TAP0_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 2 - WIENER_FILT_TAP0_MINV); + vkernel[1] = vkernel[5] = + WIENER_FILT_TAP1_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV); + vkernel[2] = vkernel[4] = + WIENER_FILT_TAP2_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV); + vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]); + vkernel[7] = 0; + } else if (kernel_type == 3) { + // Low possible values for filter coefficients, 5-tap kernel + hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0; + hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV; + hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV; + hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); + hkernel[7] = vkernel[7] = 0; + } else if (kernel_type == 4) { + // Max possible values for filter coefficients, 5-tap kernel + hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0; + hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV; + hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV; + hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); + hkernel[7] = vkernel[7] = 0; + } else { + // Randomly generated values for filter coefficients, 5-tap kernel + hkernel[0] = hkernel[6] = 0; + hkernel[1] = hkernel[5] = + WIENER_FILT_TAP1_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV); + hkernel[2] = hkernel[4] = + WIENER_FILT_TAP2_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV); + hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); + hkernel[7] = 0; + + vkernel[0] = vkernel[6] = 0; + vkernel[1] = vkernel[5] = + WIENER_FILT_TAP1_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV); + vkernel[2] = vkernel[4] = + WIENER_FILT_TAP2_MINV + + rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV); + vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]); + vkernel[7] = 0; + } +} + +namespace AV1HiprecConvolve { + +::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams( + hiprec_convolve_func filter) { + const HiprecConvolveParam params[] = { + make_tuple(8, 8, 50000, filter), make_tuple(8, 4, 50000, filter), + make_tuple(64, 24, 1000, filter), make_tuple(64, 64, 1000, filter), + make_tuple(64, 56, 1000, filter), make_tuple(32, 8, 10000, filter), + make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter), + make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter), + make_tuple(64, 34, 1000, filter), make_tuple(8, 17, 10000, filter), + make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter) + }; + return ::testing::ValuesIn(params); +} + +AV1HiprecConvolveTest::~AV1HiprecConvolveTest() = default; +void AV1HiprecConvolveTest::SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); +} + +void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2); + int i, j, k, m; + const WienerConvolveParams conv_params = get_conv_params_wiener(8); + + std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]); + ASSERT_NE(input_, nullptr); + uint8_t *input = input_.get(); + + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; + std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]); + ASSERT_NE(output, nullptr); + std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]); + ASSERT_NE(output2, nullptr); + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + for (int kernel_type = 0; kernel_type < 6; kernel_type++) { + generate_kernels(&rnd_, hkernel, vkernel, kernel_type); + for (i = 0; i < num_iters; ++i) { + for (k = 0; k < h; ++k) + for (m = 0; m < w; ++m) input[k * w + m] = rnd_.Rand8(); + // Choose random locations within the source block + int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); + int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); + av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, + output.get(), out_w, hkernel, 16, vkernel, + 16, out_w, out_h, &conv_params); + test_impl(input + offset_r * w + offset_c, w, output2.get(), out_w, + hkernel, 16, vkernel, 16, out_w, out_h, &conv_params); + + for (j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", " + << (j / out_w) << ") on iteration " << i; + } + } +} + +void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2) / 500; + int i, j, k; + const WienerConvolveParams conv_params = get_conv_params_wiener(8); + + std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]); + ASSERT_NE(input_, nullptr); + uint8_t *input = input_.get(); + + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; + std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]); + ASSERT_NE(output, nullptr); + std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]); + ASSERT_NE(output2, nullptr); + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + generate_kernels(&rnd_, hkernel, vkernel); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8(); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + av1_wiener_convolve_add_src_c(input + j * w + k, w, output.get(), out_w, + hkernel, 16, vkernel, 16, out_w, out_h, + &conv_params); + } + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + test_impl(input + j * w + k, w, output2.get(), out_w, hkernel, 16, + vkernel, 16, out_w, out_h, &conv_params); + } + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; +} +} // namespace AV1HiprecConvolve + +#if CONFIG_AV1_HIGHBITDEPTH +namespace AV1HighbdHiprecConvolve { + +::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams( + highbd_hiprec_convolve_func filter) { + const HighbdHiprecConvolveParam params[] = { + make_tuple(8, 8, 50000, 8, filter), make_tuple(64, 64, 1000, 8, filter), + make_tuple(32, 8, 10000, 8, filter), make_tuple(8, 8, 50000, 10, filter), + make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter), + make_tuple(8, 8, 50000, 12, filter), make_tuple(64, 64, 1000, 12, filter), + make_tuple(32, 8, 10000, 12, filter), + }; + return ::testing::ValuesIn(params); +} + +AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() = default; +void AV1HighbdHiprecConvolveTest::SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); +} + +void AV1HighbdHiprecConvolveTest::RunCheckOutput( + highbd_hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2); + const int bd = GET_PARAM(3); + int i, j; + const WienerConvolveParams conv_params = get_conv_params_wiener(bd); + + std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]); + ASSERT_NE(input, nullptr); + + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; + std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]); + ASSERT_NE(output, nullptr); + std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]); + ASSERT_NE(output2, nullptr); + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + + uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get()); + uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get()); + uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get()); + for (int kernel_type = 0; kernel_type < 6; kernel_type++) { + generate_kernels(&rnd_, hkernel, vkernel, kernel_type); + for (i = 0; i < num_iters; ++i) { + // Choose random locations within the source block + int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); + int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); + av1_highbd_wiener_convolve_add_src_c( + input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, + 16, vkernel, 16, out_w, out_h, &conv_params, bd); + test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w, + hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd); + + for (j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", " + << (j / out_w) << ") on iteration " << i; + } + } +} + +void AV1HighbdHiprecConvolveTest::RunSpeedTest( + highbd_hiprec_convolve_func test_impl) { + const int w = 128, h = 128; + const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); + const int num_iters = GET_PARAM(2) / 500; + const int bd = GET_PARAM(3); + int i, j, k; + const WienerConvolveParams conv_params = get_conv_params_wiener(bd); + + std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]); + ASSERT_NE(input, nullptr); + + // The AVX2 convolve functions always write rows with widths that are + // multiples of 16. So to avoid a buffer overflow, we may need to pad + // rows to a multiple of 16. + int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; + std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]); + ASSERT_NE(output, nullptr); + std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]); + ASSERT_NE(output2, nullptr); + + // Generate random filter kernels + DECLARE_ALIGNED(16, InterpKernel, hkernel); + DECLARE_ALIGNED(16, InterpKernel, vkernel); + + generate_kernels(&rnd_, hkernel, vkernel); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); + + uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get()); + uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get()); + uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get()); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + av1_highbd_wiener_convolve_add_src_c( + input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel, + 16, out_w, out_h, &conv_params, bd); + } + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < num_iters; ++i) { + for (j = 3; j < h - out_h - 4; j++) { + for (k = 3; k < w - out_w - 4; k++) { + test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16, + vkernel, 16, out_w, out_h, &conv_params, bd); + } + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; +} +} // namespace AV1HighbdHiprecConvolve +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace libaom_test diff --git a/third_party/aom/test/hiprec_convolve_test_util.h b/third_party/aom/test/hiprec_convolve_test_util.h new file mode 100644 index 0000000000..beae5c729b --- /dev/null +++ b/third_party/aom/test/hiprec_convolve_test_util.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_HIPREC_CONVOLVE_TEST_UTIL_H_ +#define AOM_TEST_HIPREC_CONVOLVE_TEST_UTIL_H_ + +#include <tuple> + +#include "config/av1_rtcd.h" + +#include "test/acm_random.h" +#include "test/util.h" +#include "test/register_state_check.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "aom_ports/aom_timer.h" +#include "av1/common/convolve.h" +#include "av1/common/mv.h" + +namespace libaom_test { + +namespace AV1HiprecConvolve { + +typedef void (*hiprec_convolve_func)(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, + const WienerConvolveParams *conv_params); + +typedef std::tuple<int, int, int, hiprec_convolve_func> HiprecConvolveParam; + +::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams( + hiprec_convolve_func filter); + +class AV1HiprecConvolveTest + : public ::testing::TestWithParam<HiprecConvolveParam> { + public: + ~AV1HiprecConvolveTest() override; + void SetUp() override; + + protected: + void RunCheckOutput(hiprec_convolve_func test_impl); + void RunSpeedTest(hiprec_convolve_func test_impl); + + libaom_test::ACMRandom rnd_; +}; + +} // namespace AV1HiprecConvolve + +#if CONFIG_AV1_HIGHBITDEPTH +namespace AV1HighbdHiprecConvolve { +typedef void (*highbd_hiprec_convolve_func)( + const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, + ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, int w, int h, + const WienerConvolveParams *conv_params, int bps); + +typedef std::tuple<int, int, int, int, highbd_hiprec_convolve_func> + HighbdHiprecConvolveParam; + +::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams( + highbd_hiprec_convolve_func filter); + +class AV1HighbdHiprecConvolveTest + : public ::testing::TestWithParam<HighbdHiprecConvolveParam> { + public: + ~AV1HighbdHiprecConvolveTest() override; + void SetUp() override; + + protected: + void RunCheckOutput(highbd_hiprec_convolve_func test_impl); + void RunSpeedTest(highbd_hiprec_convolve_func test_impl); + + libaom_test::ACMRandom rnd_; +}; + +} // namespace AV1HighbdHiprecConvolve +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace libaom_test + +#endif // AOM_TEST_HIPREC_CONVOLVE_TEST_UTIL_H_ diff --git a/third_party/aom/test/horver_correlation_test.cc b/third_party/aom/test/horver_correlation_test.cc new file mode 100644 index 0000000000..5e397ffdf7 --- /dev/null +++ b/third_party/aom/test/horver_correlation_test.cc @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "aom/aom_integer.h" + +using libaom_test::ACMRandom; + +namespace { +typedef void (*HorverFunc)(const int16_t *diff, int stride, int w, int h, + float *hcorr, float *vcorr); + +typedef std::tuple<const HorverFunc> HorverTestParam; + +class HorverTest : public ::testing::TestWithParam<HorverTestParam> { + public: + void SetUp() override { + data_buf_ = (int16_t *)aom_malloc(MAX_SB_SQUARE * sizeof(int16_t)); + ASSERT_NE(data_buf_, nullptr); + target_func_ = GET_PARAM(0); + } + void TearDown() override { aom_free(data_buf_); } + void RunHorverTest(); + void RunHorverTest_ExtremeValues(); + void RunHorverSpeedTest(int run_times); + + private: + HorverFunc target_func_; + ACMRandom rng_; + int16_t *data_buf_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HorverTest); + +void HorverTest::RunHorverTest() { + for (int block_size = 0; block_size < BLOCK_SIZES_ALL; block_size++) { + const int w = block_size_wide[block_size]; + const int h = block_size_high[block_size]; + for (int iter = 0; iter < 1000 && !HasFatalFailure(); ++iter) { + float hcorr_ref = 0.0, vcorr_ref = 0.0; + float hcorr_test = 0.0, vcorr_test = 0.0; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + data_buf_[i] = (rng_.Rand16() % (1 << 12)) - (1 << 11); + } + + av1_get_horver_correlation_full_c(data_buf_, MAX_SB_SIZE, w, h, + &hcorr_ref, &vcorr_ref); + + target_func_(data_buf_, MAX_SB_SIZE, w, h, &hcorr_test, &vcorr_test); + + ASSERT_LE(fabs(hcorr_ref - hcorr_test), 1e-6) + << "hcorr incorrect (" << w << "x" << h << ")"; + ASSERT_LE(fabs(vcorr_ref - vcorr_test), 1e-6) + << "vcorr incorrect (" << w << "x" << h << ")"; + } + // printf("(%3dx%-3d) passed\n", w, h); + } +} + +void HorverTest::RunHorverSpeedTest(int run_times) { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + data_buf_[i] = rng_.Rand16() % (1 << 12); + } + + for (int block_size = 0; block_size < BLOCK_SIZES_ALL; block_size++) { + const int w = block_size_wide[block_size]; + const int h = block_size_high[block_size]; + float hcorr_ref = 0.0, vcorr_ref = 0.0; + float hcorr_test = 0.0, vcorr_test = 0.0; + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + av1_get_horver_correlation_full_c(data_buf_, MAX_SB_SIZE, w, h, + &hcorr_ref, &vcorr_ref); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + target_func_(data_buf_, MAX_SB_SIZE, w, h, &hcorr_test, &vcorr_test); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("%3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", w, h, time1, time2, + time1 / time2); + } +} + +void HorverTest::RunHorverTest_ExtremeValues() { + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + // Most of get_horver_test is squaring and summing, so simply saturating + // the whole buffer is mostly likely to cause an overflow. + data_buf_[i] = (1 << 12) - 1; + } + + for (int block_size = 0; block_size < BLOCK_SIZES_ALL; block_size++) { + const int w = block_size_wide[block_size]; + const int h = block_size_high[block_size]; + float hcorr_ref = 0.0, vcorr_ref = 0.0; + float hcorr_test = 0.0, vcorr_test = 0.0; + + av1_get_horver_correlation_full_c(data_buf_, MAX_SB_SIZE, w, h, &hcorr_ref, + &vcorr_ref); + target_func_(data_buf_, MAX_SB_SIZE, w, h, &hcorr_test, &vcorr_test); + + ASSERT_LE(fabs(hcorr_ref - hcorr_test), 1e-6) << "hcorr incorrect"; + ASSERT_LE(fabs(vcorr_ref - vcorr_test), 1e-6) << "vcorr incorrect"; + } +} + +TEST_P(HorverTest, RandomValues) { RunHorverTest(); } + +TEST_P(HorverTest, ExtremeValues) { RunHorverTest_ExtremeValues(); } + +TEST_P(HorverTest, DISABLED_Speed) { RunHorverSpeedTest(100000); } + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, HorverTest, + ::testing::Values(av1_get_horver_correlation_full_sse4_1)); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, HorverTest, ::testing::Values(av1_get_horver_correlation_full_neon)); +#endif // HAVE_NEON + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, HorverTest, ::testing::Values(av1_get_horver_correlation_full_avx2)); +#endif // HAVE_AVX2 + +} // namespace diff --git a/third_party/aom/test/horz_superres_test.cc b/third_party/aom/test/horz_superres_test.cc new file mode 100644 index 0000000000..595ed548c7 --- /dev/null +++ b/third_party/aom/test/horz_superres_test.cc @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> +#include <ostream> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "av1/encoder/encoder.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" + +namespace { + +using std::make_tuple; +using std::tuple; + +/* TESTING PARAMETERS */ + +const int kBitrate = 40; + +typedef struct { + const char *filename; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; + unsigned int limit; + unsigned int screen_content; + double psnr_threshold; // used by modes other than AOM_SUPERRES_AUTO + double psnr_threshold2; // used by AOM_SUPERRES_AUTO +} TestVideoParam; + +std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) { + return os << "TestVideoParam { filename:" << test_arg.filename + << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth + << " profile:" << test_arg.profile << " limit:" << test_arg.limit + << " screen_content:" << test_arg.screen_content + << " psnr_threshold:" << test_arg.psnr_threshold << " }"; +} + +const TestVideoParam kTestVideoVectors[] = { + { "park_joy_90p_8_420.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 5, 0, 25.3, + 44.7 }, +#if CONFIG_AV1_HIGHBITDEPTH + { "park_joy_90p_10_444.y4m", AOM_IMG_FMT_I44416, AOM_BITS_10, 1, 5, 0, 27.0, + 46.8 }, +#endif + { "screendata.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 4, 1, 23.0, 52.5 }, + // Image coding (single frame). + { "niklas_1280_720_30.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 1, 0, 32.0, + 49.0 }, +}; + +// Modes with extra params have their own tests. +const aom_superres_mode kSuperresModesWithoutParams[] = { AOM_SUPERRES_RANDOM, + AOM_SUPERRES_AUTO }; + +// Superres denominators and superres kf denominators to be tested +typedef tuple<int, int> SuperresDenominatorPair; +const SuperresDenominatorPair kSuperresDenominators[] = { + make_tuple(16, 9), make_tuple(13, 11), make_tuple(9, 9), + make_tuple(13, 13), make_tuple(11, 16), make_tuple(8, 16), + make_tuple(16, 8), make_tuple(8, 8), make_tuple(9, 14), +}; + +// Superres q thresholds and superres kf q thresholds to be tested +typedef tuple<int, int> SuperresQThresholdPair; +const SuperresQThresholdPair kSuperresQThresholds[] = { + make_tuple(63, 63), make_tuple(63, 41), make_tuple(17, 63), + make_tuple(41, 11), make_tuple(1, 37), make_tuple(11, 11), + make_tuple(1, 1), make_tuple(17, 29), make_tuple(29, 11), +}; + +/* END (TESTING PARAMETERS) */ + +// Test parameter list: +// <[needed for EncoderTest], test_video_param_, superres_mode_> +typedef tuple<const libaom_test::CodecFactory *, TestVideoParam, + aom_superres_mode> + HorzSuperresTestParam; + +class HorzSuperresEndToEndTest + : public ::testing::TestWithParam<HorzSuperresTestParam>, + public ::libaom_test::EncoderTest { + protected: + HorzSuperresEndToEndTest() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), + superres_mode_(GET_PARAM(2)), psnr_(0.0), frame_count_(0) {} + + ~HorzSuperresEndToEndTest() override = default; + + void SetUp() override { + InitializeConfig(::libaom_test::kTwoPassGood); + cfg_.g_lag_in_frames = 5; + cfg_.rc_end_usage = AOM_Q; + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + // Set superres parameters + cfg_.rc_superres_mode = superres_mode_; + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + frame_count_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + frame_count_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 4); + + // Set cpu-used = 8 for speed + encoder->Control(AOME_SET_CPUUSED, 8); + + // Test screen coding tools + if (test_video_param_.screen_content) + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN); + else + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); + + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + double GetAveragePsnr() const { + if (frame_count_) return psnr_ / frame_count_; + return 0.0; + } + + void DoTest() { + std::unique_ptr<libaom_test::VideoSource> video; + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + test_video_param_.limit)); + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr_thresh = (superres_mode_ == AOM_SUPERRES_AUTO) + ? test_video_param_.psnr_threshold2 + : test_video_param_.psnr_threshold; + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, psnr_thresh); + + EXPECT_EQ(test_video_param_.limit, frame_count_); + } + + TestVideoParam test_video_param_; + aom_superres_mode superres_mode_; + + private: + double psnr_; + unsigned int frame_count_; +}; + +TEST_P(HorzSuperresEndToEndTest, HorzSuperresEndToEndPSNRTest) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(HorzSuperresEndToEndTest, + ::testing::ValuesIn(kTestVideoVectors), + ::testing::ValuesIn(kSuperresModesWithoutParams)); + +// Test parameter list: +// <[needed for EncoderTest], test_video_param_, tuple(superres_denom_, +// superres_kf_denom_)> +typedef tuple<const libaom_test::CodecFactory *, TestVideoParam, + SuperresDenominatorPair> + HorzSuperresFixedTestParam; + +class HorzSuperresFixedEndToEndTest + : public ::testing::TestWithParam<HorzSuperresFixedTestParam>, + public ::libaom_test::EncoderTest { + protected: + HorzSuperresFixedEndToEndTest() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), + superres_mode_(AOM_SUPERRES_FIXED), psnr_(0.0), frame_count_(0) { + SuperresDenominatorPair denoms = GET_PARAM(2); + superres_denom_ = std::get<0>(denoms); + superres_kf_denom_ = std::get<1>(denoms); + } + + ~HorzSuperresFixedEndToEndTest() override = default; + + void SetUp() override { + InitializeConfig(::libaom_test::kTwoPassGood); + cfg_.g_lag_in_frames = 5; + cfg_.rc_end_usage = AOM_VBR; + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + // Set superres parameters + cfg_.rc_superres_mode = superres_mode_; + cfg_.rc_superres_denominator = superres_denom_; + cfg_.rc_superres_kf_denominator = superres_kf_denom_; + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + frame_count_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + frame_count_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 4); + + // Set cpu-used = 8 for speed + encoder->Control(AOME_SET_CPUUSED, 8); + + // Test screen coding tools + if (test_video_param_.screen_content) + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN); + else + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); + + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + double GetAveragePsnr() const { + if (frame_count_) return psnr_ / frame_count_; + return 0.0; + } + + void DoTest() { + std::unique_ptr<libaom_test::VideoSource> video; + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + test_video_param_.limit)); + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, test_video_param_.psnr_threshold) + << "superres_mode_ = " << superres_mode_ + << ", superres_denom_ = " << superres_denom_ + << ", superres_kf_denom_ = " << superres_kf_denom_; + + EXPECT_EQ(test_video_param_.limit, frame_count_) + << "superres_mode_ = " << superres_mode_ + << ", superres_denom_ = " << superres_denom_ + << ", superres_kf_denom_ = " << superres_kf_denom_; + } + + TestVideoParam test_video_param_; + aom_superres_mode superres_mode_; + int superres_denom_; + int superres_kf_denom_; + + private: + double psnr_; + unsigned int frame_count_; +}; + +TEST_P(HorzSuperresFixedEndToEndTest, HorzSuperresFixedTestParam) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(HorzSuperresFixedEndToEndTest, + ::testing::ValuesIn(kTestVideoVectors), + ::testing::ValuesIn(kSuperresDenominators)); + +// Test parameter list: +// <[needed for EncoderTest], test_video_param_, +// tuple(superres_qthresh_,superres_kf_qthresh_)> +typedef tuple<const libaom_test::CodecFactory *, TestVideoParam, + SuperresQThresholdPair> + HorzSuperresQThreshTestParam; + +class HorzSuperresQThreshEndToEndTest + : public ::testing::TestWithParam<HorzSuperresQThreshTestParam>, + public ::libaom_test::EncoderTest { + protected: + HorzSuperresQThreshEndToEndTest() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), + superres_mode_(AOM_SUPERRES_QTHRESH), psnr_(0.0), frame_count_(0) { + SuperresQThresholdPair qthresholds = GET_PARAM(2); + superres_qthresh_ = std::get<0>(qthresholds); + superres_kf_qthresh_ = std::get<1>(qthresholds); + } + + ~HorzSuperresQThreshEndToEndTest() override = default; + + void SetUp() override { + InitializeConfig(::libaom_test::kTwoPassGood); + cfg_.g_lag_in_frames = 5; + cfg_.rc_end_usage = AOM_VBR; + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + // Set superres parameters + cfg_.rc_superres_mode = superres_mode_; + cfg_.rc_superres_qthresh = superres_qthresh_; + cfg_.rc_superres_kf_qthresh = superres_kf_qthresh_; + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + frame_count_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + frame_count_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, 0); + + // Set cpu-used = 8 for speed + encoder->Control(AOME_SET_CPUUSED, 8); + + // Test screen coding tools + if (test_video_param_.screen_content) + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN); + else + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); + + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + double GetAveragePsnr() const { + if (frame_count_) return psnr_ / frame_count_; + return 0.0; + } + + void DoTest() { + std::unique_ptr<libaom_test::VideoSource> video; + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + test_video_param_.limit)); + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, test_video_param_.psnr_threshold) + << "superres_mode_ = " << superres_mode_ + << ", superres_qthresh_ = " << superres_qthresh_ + << ", superres_kf_qthresh_ = " << superres_kf_qthresh_; + + EXPECT_EQ(test_video_param_.limit, frame_count_) + << "superres_mode_ = " << superres_mode_ + << ", superres_qthresh_ = " << superres_qthresh_ + << ", superres_kf_qthresh_ = " << superres_kf_qthresh_; + } + + TestVideoParam test_video_param_; + aom_superres_mode superres_mode_; + int superres_qthresh_; + int superres_kf_qthresh_; + + private: + double psnr_; + unsigned int frame_count_; +}; + +TEST_P(HorzSuperresQThreshEndToEndTest, HorzSuperresQThreshEndToEndPSNRTest) { + DoTest(); +} + +AV1_INSTANTIATE_TEST_SUITE(HorzSuperresQThreshEndToEndTest, + ::testing::ValuesIn(kTestVideoVectors), + ::testing::ValuesIn(kSuperresQThresholds)); + +} // namespace diff --git a/third_party/aom/test/i420_video_source.h b/third_party/aom/test/i420_video_source.h new file mode 100644 index 0000000000..233e7152b9 --- /dev/null +++ b/third_party/aom/test/i420_video_source.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef AOM_TEST_I420_VIDEO_SOURCE_H_ +#define AOM_TEST_I420_VIDEO_SOURCE_H_ +#include <cstdio> +#include <cstdlib> +#include <string> + +#include "test/yuv_video_source.h" + +namespace libaom_test { + +// This class extends VideoSource to allow parsing of raw yv12 +// so that we can do actual file encodes. +class I420VideoSource : public YUVVideoSource { + public: + I420VideoSource(const std::string &file_name, unsigned int width, + unsigned int height, int rate_numerator, int rate_denominator, + unsigned int start, int limit) + : YUVVideoSource(file_name, AOM_IMG_FMT_I420, width, height, + rate_numerator, rate_denominator, start, limit) {} +}; + +} // namespace libaom_test + +#endif // AOM_TEST_I420_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/intra_edge_test.cc b/third_party/aom/test/intra_edge_test.cc new file mode 100644 index 0000000000..96ee65466b --- /dev/null +++ b/third_party/aom/test/intra_edge_test.cc @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/register_state_check.h" +#include "test/function_equivalence_test.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "aom/aom_integer.h" +#include "av1/common/enums.h" + +using libaom_test::FunctionEquivalenceTest; + +namespace { + +template <typename F, typename T> +class UpsampleTest : public FunctionEquivalenceTest<F> { + protected: + static const int kIterations = 1000000; + static const int kMinEdge = 4; + static const int kMaxEdge = 24; + static const int kBufSize = 2 * 64 + 32; + static const int kOffset = 16; + + ~UpsampleTest() override = default; + + virtual void Execute(T *edge_tst) = 0; + + void Common() { + edge_ref_ = &edge_ref_data_[kOffset]; + edge_tst_ = &edge_tst_data_[kOffset]; + + Execute(edge_tst_); + + const int max_idx = (size_ - 1) * 2; + for (int r = -2; r <= max_idx; ++r) { + ASSERT_EQ(edge_ref_[r], edge_tst_[r]); + } + } + + T edge_ref_data_[kBufSize]; + T edge_tst_data_[kBufSize]; + + T *edge_ref_; + T *edge_tst_; + + int size_; +}; + +typedef void (*UP8B)(uint8_t *p, int size); +typedef libaom_test::FuncParam<UP8B> TestFuncs; + +class UpsampleTest8B : public UpsampleTest<UP8B, uint8_t> { + protected: + void Execute(uint8_t *edge_tst) override { + params_.ref_func(edge_ref_, size_); + API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_)); + } +}; + +TEST_P(UpsampleTest8B, RandomValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + size_ = 4 * (this->rng_(4) + 1); + + int i, pix = 0; + for (i = 0; i < kOffset + size_; ++i) { + pix = rng_.Rand8(); + edge_ref_data_[i] = pix; + edge_tst_data_[i] = edge_ref_data_[i]; + } + + // Extend final sample + while (i < kBufSize) { + edge_ref_data_[i] = pix; + edge_tst_data_[i] = pix; + i++; + } + + Common(); + } +} + +TEST_P(UpsampleTest8B, DISABLED_Speed) { + const int test_count = 10000000; + size_ = kMaxEdge; + for (int i = 0; i < kOffset + size_; ++i) { + edge_tst_data_[i] = rng_.Rand8(); + } + edge_tst_ = &edge_tst_data_[kOffset]; + for (int iter = 0; iter < test_count; ++iter) { + API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_)); + } +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, UpsampleTest8B, + ::testing::Values(TestFuncs(av1_upsample_intra_edge_c, + av1_upsample_intra_edge_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, UpsampleTest8B, + ::testing::Values(TestFuncs(av1_upsample_intra_edge_c, + av1_upsample_intra_edge_neon))); +#endif // HAVE_NEON + +template <typename F, typename T> +class FilterEdgeTest : public FunctionEquivalenceTest<F> { + protected: + static const int kIterations = 1000000; + static const int kMaxEdge = 2 * 64; + static const int kBufSize = kMaxEdge + 32; + static const int kOffset = 15; + + ~FilterEdgeTest() override = default; + + virtual void Execute(T *edge_tst) = 0; + + void Common() { + edge_ref_ = &edge_ref_data_[kOffset]; + edge_tst_ = &edge_tst_data_[kOffset]; + + Execute(edge_tst_); + + for (int r = 0; r < size_; ++r) { + ASSERT_EQ(edge_ref_[r], edge_tst_[r]); + } + } + + T edge_ref_data_[kBufSize]; + T edge_tst_data_[kBufSize]; + + T *edge_ref_; + T *edge_tst_; + + int size_; + int strength_; +}; + +typedef void (*FE8B)(uint8_t *p, int size, int strength); +typedef libaom_test::FuncParam<FE8B> FilterEdgeTestFuncs; + +class FilterEdgeTest8B : public FilterEdgeTest<FE8B, uint8_t> { + protected: + void Execute(uint8_t *edge_tst) override { + params_.ref_func(edge_ref_, size_, strength_); + API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, strength_)); + } +}; + +TEST_P(FilterEdgeTest8B, RandomValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + strength_ = this->rng_(4); + size_ = 4 * (this->rng_(128 / 4) + 1) + 1; + + int i, pix = 0; + for (i = 0; i < kOffset + size_; ++i) { + pix = rng_.Rand8(); + edge_ref_data_[i] = pix; + edge_tst_data_[i] = pix; + } + + Common(); + } +} + +TEST_P(FilterEdgeTest8B, DISABLED_Speed) { + const int test_count = 10000000; + size_ = kMaxEdge; + strength_ = 1; + for (int i = 0; i < kOffset + size_; ++i) { + edge_tst_data_[i] = rng_.Rand8(); + } + edge_tst_ = &edge_tst_data_[kOffset]; + for (int iter = 0; iter < test_count; ++iter) { + API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, strength_)); + // iterate over filter strengths (1,2,3) + strength_ = strength_ == 3 ? 1 : strength_ + 1; + } +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, FilterEdgeTest8B, + ::testing::Values(FilterEdgeTestFuncs(av1_filter_intra_edge_c, + av1_filter_intra_edge_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, FilterEdgeTest8B, + ::testing::Values(FilterEdgeTestFuncs(av1_filter_intra_edge_c, + av1_filter_intra_edge_neon))); +#endif // HAVE_NEON + +#if CONFIG_AV1_HIGHBITDEPTH + +typedef void (*UPHB)(uint16_t *p, int size, int bd); +typedef libaom_test::FuncParam<UPHB> TestFuncsHBD; + +class UpsampleTestHB : public UpsampleTest<UPHB, uint16_t> { + protected: + void Execute(uint16_t *edge_tst) override { + params_.ref_func(edge_ref_, size_, bit_depth_); + API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, bit_depth_)); + } + int bit_depth_; +}; + +TEST_P(UpsampleTestHB, RandomValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + switch (rng_(3)) { + case 0: bit_depth_ = 8; break; + case 1: bit_depth_ = 10; break; + default: bit_depth_ = 12; break; + } + const int hi = 1 << bit_depth_; + + size_ = 4 * (this->rng_(4) + 1); + + int i, pix = 0; + for (i = 0; i < kOffset + size_; ++i) { + pix = rng_(hi); + edge_ref_data_[i] = pix; + edge_tst_data_[i] = pix; + } + + // Extend final sample + while (i < kBufSize) { + edge_ref_data_[i] = pix; + edge_tst_data_[i] = pix; + i++; + } + + Common(); + } +} + +TEST_P(UpsampleTestHB, DISABLED_Speed) { + const int test_count = 10000000; + size_ = kMaxEdge; + bit_depth_ = 12; + const int hi = 1 << bit_depth_; + for (int i = 0; i < kOffset + size_; ++i) { + edge_tst_data_[i] = rng_(hi); + } + edge_tst_ = &edge_tst_data_[kOffset]; + for (int iter = 0; iter < test_count; ++iter) { + API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, bit_depth_)); + } +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, UpsampleTestHB, + ::testing::Values(TestFuncsHBD(av1_highbd_upsample_intra_edge_c, + av1_highbd_upsample_intra_edge_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, UpsampleTestHB, + ::testing::Values(TestFuncsHBD(av1_highbd_upsample_intra_edge_c, + av1_highbd_upsample_intra_edge_neon))); +#endif // HAVE_NEON + +typedef void (*FEHB)(uint16_t *p, int size, int strength); +typedef libaom_test::FuncParam<FEHB> FilterEdgeTestFuncsHBD; + +class FilterEdgeTestHB : public FilterEdgeTest<FEHB, uint16_t> { + protected: + void Execute(uint16_t *edge_tst) override { + params_.ref_func(edge_ref_, size_, strength_); + API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, strength_)); + } + int bit_depth_; +}; + +TEST_P(FilterEdgeTestHB, RandomValues) { + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + switch (rng_(3)) { + case 0: bit_depth_ = 8; break; + case 1: bit_depth_ = 10; break; + default: bit_depth_ = 12; break; + } + const int hi = 1 << bit_depth_; + strength_ = this->rng_(4); + size_ = 4 * (this->rng_(128 / 4) + 1) + 1; + + int i, pix = 0; + for (i = 0; i < kOffset + size_; ++i) { + pix = rng_(hi); + edge_ref_data_[i] = pix; + edge_tst_data_[i] = pix; + } + + Common(); + } +} + +TEST_P(FilterEdgeTestHB, DISABLED_Speed) { + const int test_count = 10000000; + size_ = kMaxEdge; + strength_ = 1; + bit_depth_ = 12; + const int hi = 1 << bit_depth_; + for (int i = 0; i < kOffset + size_; ++i) { + edge_tst_data_[i] = rng_(hi); + } + edge_tst_ = &edge_tst_data_[kOffset]; + for (int iter = 0; iter < test_count; ++iter) { + API_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, strength_)); + // iterate over filter strengths (1,2,3) + strength_ = strength_ == 3 ? 1 : strength_ + 1; + } +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, FilterEdgeTestHB, + ::testing::Values(FilterEdgeTestFuncsHBD( + av1_highbd_filter_intra_edge_c, + av1_highbd_filter_intra_edge_sse4_1))); +#endif // HAVE_SSE4_1 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, FilterEdgeTestHB, + ::testing::Values(FilterEdgeTestFuncsHBD( + av1_highbd_filter_intra_edge_c, + av1_highbd_filter_intra_edge_neon))); +#endif // HAVE_NEON + +#endif // CONFIG_AV1_HIGHBITDEPTH + +} // namespace diff --git a/third_party/aom/test/intrabc_test.cc b/third_party/aom/test/intrabc_test.cc new file mode 100644 index 0000000000..2c60596ab8 --- /dev/null +++ b/third_party/aom/test/intrabc_test.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#include "av1/common/av1_common_int.h" +#include "av1/common/blockd.h" +#include "av1/common/enums.h" +#include "av1/common/mv.h" +#include "av1/common/mvref_common.h" +#include "av1/common/tile_common.h" + +namespace { +TEST(IntrabcTest, DvValidation) { + struct DvTestCase { + MV dv; + int mi_row_offset; + int mi_col_offset; + BLOCK_SIZE bsize; + bool valid; + }; + const int kSubPelScale = 8; + const int kTileMaxMibWidth = 8; + const DvTestCase kDvCases[] = { + { { 0, 0 }, 0, 0, BLOCK_128X128, false }, + { { 0, 0 }, 0, 0, BLOCK_64X64, false }, + { { 0, 0 }, 0, 0, BLOCK_32X32, false }, + { { 0, 0 }, 0, 0, BLOCK_16X16, false }, + { { 0, 0 }, 0, 0, BLOCK_8X8, false }, + { { 0, 0 }, 0, 0, BLOCK_4X4, false }, + { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + true }, + { { 0, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + false }, + { { -MAX_SB_SIZE * kSubPelScale, 0 }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + true }, + { { MAX_SB_SIZE * kSubPelScale, 0 }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + false }, + { { 0, MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_16X16, + false }, + { { -32 * kSubPelScale, -32 * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + true }, + { { -32 * kSubPelScale, -32 * kSubPelScale }, + 32 / MI_SIZE, + 32 / MI_SIZE, + BLOCK_32X32, + false }, + { { -32 * kSubPelScale - kSubPelScale / 2, -32 * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + false }, + { { -33 * kSubPelScale, -32 * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + true }, + { { -32 * kSubPelScale, -32 * kSubPelScale - kSubPelScale / 2 }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + false }, + { { -32 * kSubPelScale, -33 * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_32X32, + true }, + { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + true }, + { { -(MAX_SB_SIZE + 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE + 1) * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + { { -(MAX_SB_SIZE - 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + true }, + { { -(MAX_SB_SIZE - 1) * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + { { -MAX_SB_SIZE * kSubPelScale, MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + { { -MAX_SB_SIZE * kSubPelScale, + (kTileMaxMibWidth - 2) * MAX_SB_SIZE * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + { { -MAX_SB_SIZE * kSubPelScale, + ((kTileMaxMibWidth - 2) * MAX_SB_SIZE + 1) * kSubPelScale }, + MAX_SB_SIZE / MI_SIZE, + MAX_SB_SIZE / MI_SIZE, + BLOCK_LARGEST, + false }, + }; + + MACROBLOCKD xd; + memset(&xd, 0, sizeof(xd)); + xd.tile.mi_row_start = 8 * MAX_MIB_SIZE; + xd.tile.mi_row_end = 16 * MAX_MIB_SIZE; + xd.tile.mi_col_start = 24 * MAX_MIB_SIZE; + xd.tile.mi_col_end = xd.tile.mi_col_start + kTileMaxMibWidth * MAX_MIB_SIZE; + xd.plane[1].subsampling_x = 1; + xd.plane[1].subsampling_y = 1; + xd.plane[2].subsampling_x = 1; + xd.plane[2].subsampling_y = 1; + + SequenceHeader seq_params = {}; + AV1_COMMON cm; + memset(&cm, 0, sizeof(cm)); + cm.seq_params = &seq_params; + + for (const DvTestCase &dv_case : kDvCases) { + const int mi_row = xd.tile.mi_row_start + dv_case.mi_row_offset; + const int mi_col = xd.tile.mi_col_start + dv_case.mi_col_offset; + xd.is_chroma_ref = is_chroma_reference(mi_row, mi_col, dv_case.bsize, + xd.plane[1].subsampling_x, + xd.plane[1].subsampling_y); + EXPECT_EQ(static_cast<int>(dv_case.valid), + av1_is_dv_valid(dv_case.dv, &cm, &xd, mi_row, mi_col, + dv_case.bsize, MAX_MIB_SIZE_LOG2)); + } +} +} // namespace diff --git a/third_party/aom/test/intrapred_test.cc b/third_party/aom/test/intrapred_test.cc new file mode 100644 index 0000000000..8796e8ba69 --- /dev/null +++ b/third_party/aom/test/intrapred_test.cc @@ -0,0 +1,488 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/blockd.h" +#include "av1/common/common.h" +#include "av1/common/pred_common.h" +#include "aom_mem/aom_mem.h" + +namespace { + +using libaom_test::ACMRandom; + +const int count_test_block = 100000; + +typedef void (*HighbdIntraPred)(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bps); +typedef void (*IntraPred)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, + const uint8_t *left); + +} // namespace + +// NOTE: Under gcc version 7.3.0 (Debian 7.3.0-5), if this template is in the +// anonymous namespace, then we get a strange compiler warning in +// the begin() and end() methods of the ParamGenerator template class in +// gtest/internal/gtest-param-util.h: +// warning: ‘<anonymous>’ is used uninitialized in this function +// As a workaround, put this template outside the anonymous namespace. +// See bug aomedia:2003. +template <typename FuncType> +struct IntraPredFunc { + IntraPredFunc(FuncType pred = nullptr, FuncType ref = nullptr, + int block_width_value = 0, int block_height_value = 0, + int bit_depth_value = 0) + : pred_fn(pred), ref_fn(ref), block_width(block_width_value), + block_height(block_height_value), bit_depth(bit_depth_value) {} + + FuncType pred_fn; + FuncType ref_fn; + int block_width; + int block_height; + int bit_depth; +}; + +namespace { + +template <typename FuncType, typename Pixel> +class AV1IntraPredTest + : public ::testing::TestWithParam<IntraPredFunc<FuncType> > { + public: + void RunTest(Pixel *left_col, Pixel *above_data, Pixel *dst, Pixel *ref_dst) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int block_width = params_.block_width; + const int block_height = params_.block_height; + above_row_ = above_data + 16; + left_col_ = left_col; + dst_ = dst; + ref_dst_ = ref_dst; + int error_count = 0; + for (int i = 0; i < count_test_block; ++i) { + // Fill edges with random data, try first with saturated values. + for (int x = -1; x <= block_width * 2; x++) { + if (i == 0) { + above_row_[x] = mask_; + } else { + above_row_[x] = rnd.Rand16() & mask_; + } + } + for (int y = 0; y < block_height; y++) { + if (i == 0) { + left_col_[y] = mask_; + } else { + left_col_[y] = rnd.Rand16() & mask_; + } + } + Predict(); + CheckPrediction(i, &error_count); + } + ASSERT_EQ(0, error_count); + } + void RunSpeedTest(Pixel *left_col, Pixel *above_data, Pixel *dst, + Pixel *ref_dst) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int block_width = params_.block_width; + const int block_height = params_.block_height; + above_row_ = above_data + 16; + left_col_ = left_col; + dst_ = dst; + ref_dst_ = ref_dst; + int error_count = 0; + const int numIter = 100; + + int c_sum_time = 0; + int simd_sum_time = 0; + for (int i = 0; i < count_test_block; ++i) { + // Fill edges with random data, try first with saturated values. + for (int x = -1; x <= block_width * 2; x++) { + if (i == 0) { + above_row_[x] = mask_; + } else { + above_row_[x] = rnd.Rand16() & mask_; + } + } + for (int y = 0; y < block_height; y++) { + if (i == 0) { + left_col_[y] = mask_; + } else { + left_col_[y] = rnd.Rand16() & mask_; + } + } + + aom_usec_timer c_timer_; + aom_usec_timer_start(&c_timer_); + + PredictRefSpeedTest(numIter); + + aom_usec_timer_mark(&c_timer_); + + aom_usec_timer simd_timer_; + aom_usec_timer_start(&simd_timer_); + + PredictFncSpeedTest(numIter); + + aom_usec_timer_mark(&simd_timer_); + + c_sum_time += static_cast<int>(aom_usec_timer_elapsed(&c_timer_)); + simd_sum_time += static_cast<int>(aom_usec_timer_elapsed(&simd_timer_)); + + CheckPrediction(i, &error_count); + } + + printf( + "blockWxH = %d x %d c_time = %d \t simd_time = %d \t Gain = %4.2f \n", + block_width, block_height, c_sum_time, simd_sum_time, + (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time))); + ASSERT_EQ(0, error_count); + } + + protected: + void SetUp() override { + params_ = this->GetParam(); + stride_ = params_.block_width * 3; + mask_ = (1 << params_.bit_depth) - 1; + } + + virtual void Predict() = 0; + + virtual void PredictRefSpeedTest(int num) = 0; + virtual void PredictFncSpeedTest(int num) = 0; + + void CheckPrediction(int test_case_number, int *error_count) const { + // For each pixel ensure that the calculated value is the same as reference. + const int block_width = params_.block_width; + const int block_height = params_.block_height; + for (int y = 0; y < block_height; y++) { + for (int x = 0; x < block_width; x++) { + *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_]; + if (*error_count == 1) { + ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_]) + << " Failed on Test Case Number " << test_case_number + << " location: x = " << x << " y = " << y; + } + } + } + } + + Pixel *above_row_; + Pixel *left_col_; + Pixel *dst_; + Pixel *ref_dst_; + ptrdiff_t stride_; + int mask_; + + IntraPredFunc<FuncType> params_; +}; + +#if CONFIG_AV1_HIGHBITDEPTH +class HighbdIntraPredTest : public AV1IntraPredTest<HighbdIntraPred, uint16_t> { + protected: + void Predict() override { + const int bit_depth = params_.bit_depth; + params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth); + API_REGISTER_STATE_CHECK( + params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth)); + } + void PredictRefSpeedTest(int num) override { + const int bit_depth = params_.bit_depth; + for (int i = 0; i < num; i++) { + params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth); + } + } + void PredictFncSpeedTest(int num) override { + const int bit_depth = params_.bit_depth; + for (int i = 0; i < num; i++) { + params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth); + } + } +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HighbdIntraPredTest); + +#endif + +class LowbdIntraPredTest : public AV1IntraPredTest<IntraPred, uint8_t> { + protected: + void Predict() override { + params_.ref_fn(ref_dst_, stride_, above_row_, left_col_); + API_REGISTER_STATE_CHECK( + params_.pred_fn(dst_, stride_, above_row_, left_col_)); + } + void PredictRefSpeedTest(int num) override { + for (int i = 0; i < num; i++) { + params_.ref_fn(ref_dst_, stride_, above_row_, left_col_); + } + } + void PredictFncSpeedTest(int num) override { + for (int i = 0; i < num; i++) { + params_.pred_fn(dst_, stride_, above_row_, left_col_); + } + } +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(LowbdIntraPredTest); + +#if CONFIG_AV1_HIGHBITDEPTH +TEST_P(HighbdIntraPredTest, Bitexact) { + // max block size is 64 + DECLARE_ALIGNED(16, uint16_t, left_col[2 * 64]); + DECLARE_ALIGNED(16, uint16_t, above_data[2 * 64 + 64]); + DECLARE_ALIGNED(16, uint16_t, dst[3 * 64 * 64]); + DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 64 * 64]); + av1_zero(left_col); + av1_zero(above_data); + RunTest(left_col, above_data, dst, ref_dst); +} + +TEST_P(HighbdIntraPredTest, DISABLED_Speed) { + // max block size is 64 + DECLARE_ALIGNED(16, uint16_t, left_col[2 * 64]); + DECLARE_ALIGNED(16, uint16_t, above_data[2 * 64 + 64]); + DECLARE_ALIGNED(16, uint16_t, dst[3 * 64 * 64]); + DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 64 * 64]); + av1_zero(left_col); + av1_zero(above_data); + RunSpeedTest(left_col, above_data, dst, ref_dst); +} +#endif + +TEST_P(LowbdIntraPredTest, Bitexact) { + // max block size is 64 + DECLARE_ALIGNED(16, uint8_t, left_col[2 * 64]); + DECLARE_ALIGNED(16, uint8_t, above_data[2 * 64 + 64]); + DECLARE_ALIGNED(16, uint8_t, dst[3 * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 64 * 64]); + av1_zero(left_col); + av1_zero(above_data); + RunTest(left_col, above_data, dst, ref_dst); +} +TEST_P(LowbdIntraPredTest, DISABLED_Speed) { + // max block size is 64 + DECLARE_ALIGNED(16, uint8_t, left_col[2 * 64]); + DECLARE_ALIGNED(16, uint8_t, above_data[2 * 64 + 64]); + DECLARE_ALIGNED(16, uint8_t, dst[3 * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 64 * 64]); + av1_zero(left_col); + av1_zero(above_data); + RunSpeedTest(left_col, above_data, dst, ref_dst); +} + +#if CONFIG_AV1_HIGHBITDEPTH +// ----------------------------------------------------------------------------- +// High Bit Depth Tests +#define highbd_entry(type, width, height, opt, bd) \ + IntraPredFunc<HighbdIntraPred>( \ + &aom_highbd_##type##_predictor_##width##x##height##_##opt, \ + &aom_highbd_##type##_predictor_##width##x##height##_c, width, height, \ + bd) + +#define highbd_intrapred(type, opt, bd) \ + highbd_entry(type, 4, 4, opt, bd), highbd_entry(type, 4, 8, opt, bd), \ + highbd_entry(type, 4, 16, opt, bd), highbd_entry(type, 8, 4, opt, bd), \ + highbd_entry(type, 8, 8, opt, bd), highbd_entry(type, 8, 16, opt, bd), \ + highbd_entry(type, 8, 32, opt, bd), highbd_entry(type, 16, 4, opt, bd), \ + highbd_entry(type, 16, 8, opt, bd), highbd_entry(type, 16, 16, opt, bd), \ + highbd_entry(type, 16, 32, opt, bd), \ + highbd_entry(type, 16, 64, opt, bd), highbd_entry(type, 32, 8, opt, bd), \ + highbd_entry(type, 32, 16, opt, bd), \ + highbd_entry(type, 32, 32, opt, bd), \ + highbd_entry(type, 32, 64, opt, bd), \ + highbd_entry(type, 64, 16, opt, bd), \ + highbd_entry(type, 64, 32, opt, bd), highbd_entry(type, 64, 64, opt, bd) +#endif // CONFIG_AV1_HIGHBITDEPTH + +// --------------------------------------------------------------------------- +// Low Bit Depth Tests + +#define lowbd_entry(type, width, height, opt) \ + IntraPredFunc<IntraPred>(&aom_##type##_predictor_##width##x##height##_##opt, \ + &aom_##type##_predictor_##width##x##height##_c, \ + width, height, 8) + +#define lowbd_intrapred(type, opt) \ + lowbd_entry(type, 4, 4, opt), lowbd_entry(type, 4, 8, opt), \ + lowbd_entry(type, 4, 16, opt), lowbd_entry(type, 8, 4, opt), \ + lowbd_entry(type, 8, 8, opt), lowbd_entry(type, 8, 16, opt), \ + lowbd_entry(type, 8, 32, opt), lowbd_entry(type, 16, 4, opt), \ + lowbd_entry(type, 16, 8, opt), lowbd_entry(type, 16, 16, opt), \ + lowbd_entry(type, 16, 32, opt), lowbd_entry(type, 16, 64, opt), \ + lowbd_entry(type, 32, 8, opt), lowbd_entry(type, 32, 16, opt), \ + lowbd_entry(type, 32, 32, opt), lowbd_entry(type, 32, 64, opt), \ + lowbd_entry(type, 64, 16, opt), lowbd_entry(type, 64, 32, opt), \ + lowbd_entry(type, 64, 64, opt) + +#if HAVE_SSE2 +const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = { + lowbd_intrapred(dc, sse2), lowbd_intrapred(dc_top, sse2), + lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2), + lowbd_intrapred(v, sse2), lowbd_intrapred(h, sse2), +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, LowbdIntraPredTest, + ::testing::ValuesIn(LowbdIntraPredTestVector)); +#endif // HAVE_SSE2 + +#if HAVE_NEON +const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorNeon[] = { + lowbd_intrapred(dc, neon), lowbd_intrapred(dc_top, neon), + lowbd_intrapred(dc_left, neon), lowbd_intrapred(dc_128, neon), + lowbd_intrapred(v, neon), lowbd_intrapred(h, neon), + lowbd_intrapred(smooth, neon), lowbd_intrapred(smooth_v, neon), + lowbd_intrapred(smooth_h, neon), lowbd_intrapred(paeth, neon), +}; + +INSTANTIATE_TEST_SUITE_P(NEON, LowbdIntraPredTest, + ::testing::ValuesIn(LowbdIntraPredTestVectorNeon)); +#endif // HAVE_NEON + +#if HAVE_SSSE3 +const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = { + lowbd_intrapred(paeth, ssse3), + lowbd_intrapred(smooth, ssse3), + lowbd_intrapred(smooth_v, ssse3), + lowbd_intrapred(smooth_h, ssse3), +}; + +INSTANTIATE_TEST_SUITE_P(SSSE3, LowbdIntraPredTest, + ::testing::ValuesIn(LowbdIntraPredTestVectorSsse3)); +#endif // HAVE_SSSE3 + +#if HAVE_AVX2 +const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = { + lowbd_entry(dc, 32, 16, avx2), lowbd_entry(dc, 32, 32, avx2), + lowbd_entry(dc, 32, 64, avx2), lowbd_entry(dc, 64, 16, avx2), + lowbd_entry(dc, 64, 32, avx2), lowbd_entry(dc, 64, 64, avx2), + + lowbd_entry(dc_top, 32, 16, avx2), lowbd_entry(dc_top, 32, 32, avx2), + lowbd_entry(dc_top, 32, 64, avx2), lowbd_entry(dc_top, 64, 16, avx2), + lowbd_entry(dc_top, 64, 32, avx2), lowbd_entry(dc_top, 64, 64, avx2), + + lowbd_entry(dc_left, 32, 16, avx2), lowbd_entry(dc_left, 32, 32, avx2), + lowbd_entry(dc_left, 32, 64, avx2), lowbd_entry(dc_left, 64, 16, avx2), + lowbd_entry(dc_left, 64, 32, avx2), lowbd_entry(dc_left, 64, 64, avx2), + + lowbd_entry(dc_128, 32, 16, avx2), lowbd_entry(dc_128, 32, 32, avx2), + lowbd_entry(dc_128, 32, 64, avx2), lowbd_entry(dc_128, 64, 16, avx2), + lowbd_entry(dc_128, 64, 32, avx2), lowbd_entry(dc_128, 64, 64, avx2), + + lowbd_entry(v, 32, 16, avx2), lowbd_entry(v, 32, 32, avx2), + lowbd_entry(v, 32, 64, avx2), lowbd_entry(v, 64, 16, avx2), + lowbd_entry(v, 64, 32, avx2), lowbd_entry(v, 64, 64, avx2), + + lowbd_entry(h, 32, 32, avx2), + + lowbd_entry(paeth, 16, 8, avx2), lowbd_entry(paeth, 16, 16, avx2), + lowbd_entry(paeth, 16, 32, avx2), lowbd_entry(paeth, 16, 64, avx2), + lowbd_entry(paeth, 32, 16, avx2), lowbd_entry(paeth, 32, 32, avx2), + lowbd_entry(paeth, 32, 64, avx2), lowbd_entry(paeth, 64, 16, avx2), + lowbd_entry(paeth, 64, 32, avx2), lowbd_entry(paeth, 64, 64, avx2), +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, LowbdIntraPredTest, + ::testing::ValuesIn(LowbdIntraPredTestVectorAvx2)); +#endif // HAVE_AVX2 + +#if CONFIG_AV1_HIGHBITDEPTH +#if HAVE_NEON +const IntraPredFunc<HighbdIntraPred> HighbdIntraPredTestVectorNeon[] = { + highbd_intrapred(dc, neon, 12), highbd_intrapred(dc_top, neon, 12), + highbd_intrapred(dc_left, neon, 12), highbd_intrapred(dc_128, neon, 12), + highbd_intrapred(v, neon, 12), highbd_intrapred(h, neon, 12), + highbd_intrapred(paeth, neon, 12), highbd_intrapred(smooth, neon, 12), + highbd_intrapred(smooth_v, neon, 12), highbd_intrapred(smooth_h, neon, 12), +}; + +INSTANTIATE_TEST_SUITE_P(NEON, HighbdIntraPredTest, + ::testing::ValuesIn(HighbdIntraPredTestVectorNeon)); +#endif // HAVE_NEON + +#if HAVE_SSE2 +const IntraPredFunc<HighbdIntraPred> HighbdIntraPredTestVectorSse2[] = { + highbd_entry(dc, 4, 4, sse2, 12), + highbd_entry(dc, 4, 8, sse2, 12), + highbd_entry(dc, 8, 4, sse2, 12), + highbd_entry(dc, 8, 8, sse2, 12), + highbd_entry(dc, 8, 16, sse2, 12), + highbd_entry(dc, 16, 8, sse2, 12), + highbd_entry(dc, 16, 16, sse2, 12), + highbd_entry(dc, 16, 32, sse2, 12), + highbd_entry(dc, 32, 16, sse2, 12), + highbd_entry(dc, 32, 32, sse2, 12), + + highbd_entry(dc_top, 4, 4, sse2, 12), + highbd_entry(dc_top, 4, 8, sse2, 12), + highbd_entry(dc_top, 8, 4, sse2, 12), + highbd_entry(dc_top, 8, 8, sse2, 12), + highbd_entry(dc_top, 8, 16, sse2, 12), + highbd_entry(dc_top, 16, 8, sse2, 12), + highbd_entry(dc_top, 16, 16, sse2, 12), + highbd_entry(dc_top, 16, 32, sse2, 12), + highbd_entry(dc_top, 32, 16, sse2, 12), + highbd_entry(dc_top, 32, 32, sse2, 12), + + highbd_entry(dc_left, 4, 4, sse2, 12), + highbd_entry(dc_left, 4, 8, sse2, 12), + highbd_entry(dc_left, 8, 4, sse2, 12), + highbd_entry(dc_left, 8, 8, sse2, 12), + highbd_entry(dc_left, 8, 16, sse2, 12), + highbd_entry(dc_left, 16, 8, sse2, 12), + highbd_entry(dc_left, 16, 16, sse2, 12), + highbd_entry(dc_left, 16, 32, sse2, 12), + highbd_entry(dc_left, 32, 16, sse2, 12), + highbd_entry(dc_left, 32, 32, sse2, 12), + + highbd_entry(dc_128, 4, 4, sse2, 12), + highbd_entry(dc_128, 4, 8, sse2, 12), + highbd_entry(dc_128, 8, 4, sse2, 12), + highbd_entry(dc_128, 8, 8, sse2, 12), + highbd_entry(dc_128, 8, 16, sse2, 12), + highbd_entry(dc_128, 16, 8, sse2, 12), + highbd_entry(dc_128, 16, 16, sse2, 12), + highbd_entry(dc_128, 16, 32, sse2, 12), + highbd_entry(dc_128, 32, 16, sse2, 12), + highbd_entry(dc_128, 32, 32, sse2, 12), + + highbd_entry(v, 4, 4, sse2, 12), + highbd_entry(v, 4, 8, sse2, 12), + highbd_entry(v, 8, 4, sse2, 12), + highbd_entry(v, 8, 8, sse2, 12), + highbd_entry(v, 8, 16, sse2, 12), + highbd_entry(v, 16, 8, sse2, 12), + highbd_entry(v, 16, 16, sse2, 12), + highbd_entry(v, 16, 32, sse2, 12), + highbd_entry(v, 32, 16, sse2, 12), + highbd_entry(v, 32, 32, sse2, 12), + + highbd_entry(h, 4, 4, sse2, 12), + highbd_entry(h, 4, 8, sse2, 12), + highbd_entry(h, 8, 4, sse2, 12), + highbd_entry(h, 8, 8, sse2, 12), + highbd_entry(h, 8, 16, sse2, 12), + highbd_entry(h, 16, 8, sse2, 12), + highbd_entry(h, 16, 16, sse2, 12), + highbd_entry(h, 16, 32, sse2, 12), + highbd_entry(h, 32, 16, sse2, 12), + highbd_entry(h, 32, 32, sse2, 12), +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, HighbdIntraPredTest, + ::testing::ValuesIn(HighbdIntraPredTestVectorSse2)); +#endif // HAVE_SSE2 +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/invalid_file_test.cc b/third_party/aom/test/invalid_file_test.cc new file mode 100644 index 0000000000..791cdb8928 --- /dev/null +++ b/third_party/aom/test/invalid_file_test.cc @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdio> +#include <ostream> +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/ivf_video_source.h" +#include "test/util.h" +#include "test/video_source.h" + +namespace { + +struct DecodeParam { + int threads; + const char *filename; + const char *res_filename; // If nullptr, the result filename is + // filename + ".res". +}; + +// Constructs result file name. +std::string GetResFilename(const DecodeParam ¶m) { + if (param.res_filename != nullptr) return param.res_filename; + const std::string filename = param.filename; + return filename + ".res"; +} + +std::ostream &operator<<(std::ostream &os, const DecodeParam &dp) { + return os << "threads: " << dp.threads << " file: " << dp.filename + << " result file: " << GetResFilename(dp); +} + +class InvalidFileTest : public ::libaom_test::DecoderTest, + public ::libaom_test::CodecTestWithParam<DecodeParam> { + protected: + InvalidFileTest() : DecoderTest(GET_PARAM(0)), res_file_(nullptr) {} + + ~InvalidFileTest() override { + if (res_file_ != nullptr) fclose(res_file_); + } + + void OpenResFile(const std::string &res_file_name) { + res_file_ = libaom_test::OpenTestDataFile(res_file_name); + ASSERT_NE(res_file_, nullptr) + << "Result file open failed. Filename: " << res_file_name; + } + + void DecompressedFrameHook(const aom_image_t &img, + const unsigned int /*frame_number*/) override { + EXPECT_NE(img.fb_priv, nullptr); + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + const libaom_test::CompressedVideoSource &video, + libaom_test::Decoder *decoder) override { + EXPECT_NE(res_file_, nullptr); + int expected_res_dec = -1; + + // Read integer result. + const int res = fscanf(res_file_, "%d", &expected_res_dec); + EXPECT_NE(res, EOF) << "Read result data failed"; + + if (expected_res_dec != -1) { + // Check results match. + const DecodeParam input = GET_PARAM(1); + if (input.threads > 1) { + // The serial decode check is too strict for tile-threaded decoding as + // there is no guarantee on the decode order nor which specific error + // will take precedence. Currently a tile-level error is not forwarded + // so the frame will simply be marked corrupt. + EXPECT_TRUE(res_dec == expected_res_dec || + res_dec == AOM_CODEC_CORRUPT_FRAME) + << "Results don't match: frame number = " << video.frame_number() + << ". (" << decoder->DecodeError() + << "). Expected: " << expected_res_dec << " or " + << AOM_CODEC_CORRUPT_FRAME; + } else { + EXPECT_EQ(expected_res_dec, res_dec) + << "Results don't match: frame number = " << video.frame_number() + << ". (" << decoder->DecodeError() << ")"; + } + } + + return !HasFailure(); + } + + void HandlePeekResult(libaom_test::Decoder *const /*decoder*/, + libaom_test::CompressedVideoSource * /*video*/, + const aom_codec_err_t /*res_peek*/) override {} + + void RunTest() { + const DecodeParam input = GET_PARAM(1); + aom_codec_dec_cfg_t cfg = { 0, 0, 0, !FORCE_HIGHBITDEPTH_DECODING }; + cfg.threads = input.threads; + libaom_test::IVFVideoSource decode_video(input.filename); + decode_video.Init(); + + // The result file holds a list of expected integer results, one for each + // decoded frame. Any result that doesn't match the file's list will + // cause a test failure. + const std::string res_filename = GetResFilename(input); + OpenResFile(res_filename); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&decode_video, cfg)); + } + + private: + FILE *res_file_; +}; + +TEST_P(InvalidFileTest, ReturnCode) { RunTest(); } + +// If res_filename (the third field) is nullptr, then the result filename is +// filename + ".res" by default. Set res_filename to a string if the result +// filename differs from the default. +const DecodeParam kAV1InvalidFileTests[] = { + // { threads, filename, res_filename } + { 1, "invalid-bug-1814.ivf", nullptr }, + { 1, "invalid-chromium-906381.ivf", nullptr }, + { 1, "invalid-google-142530197.ivf", nullptr }, + { 1, "invalid-google-142530197-1.ivf", nullptr }, + { 4, "invalid-oss-fuzz-9463.ivf", "invalid-oss-fuzz-9463.ivf.res.2" }, + { 1, "invalid-oss-fuzz-9720.ivf", nullptr }, + { 1, "invalid-oss-fuzz-10389.ivf", "invalid-oss-fuzz-10389.ivf.res.4" }, +#if !CHROMIUM && !CONFIG_SIZE_LIMIT || \ + (CONFIG_SIZE_LIMIT && DECODE_WIDTH_LIMIT >= 5120 && \ + DECODE_HEIGHT_LIMIT >= 180) + { 1, "invalid-oss-fuzz-11523.ivf", "invalid-oss-fuzz-11523.ivf.res.2" }, +#endif + { 4, "invalid-oss-fuzz-15363.ivf", nullptr }, + { 1, "invalid-oss-fuzz-16437.ivf", "invalid-oss-fuzz-16437.ivf.res.2" }, +#if CONFIG_MAX_DECODE_PROFILE >= 1 + { 1, "invalid-oss-fuzz-24706.ivf", nullptr }, +#endif +#if CONFIG_AV1_HIGHBITDEPTH + // These test vectors contain 10-bit or 12-bit video. + { 1, "invalid-oss-fuzz-9288.ivf", nullptr }, + { 1, "invalid-oss-fuzz-9482.ivf", nullptr }, + { 1, "invalid-oss-fuzz-10061.ivf", nullptr }, + { 1, "invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf", nullptr }, + { 1, "invalid-oss-fuzz-10227.ivf", nullptr }, + { 4, "invalid-oss-fuzz-10555.ivf", nullptr }, + { 1, "invalid-oss-fuzz-10705.ivf", nullptr }, +#if CONFIG_CWG_C013 + { 1, "invalid-oss-fuzz-10723.ivf", "invalid-oss-fuzz-10723.ivf.res.3" }, +#else + { 1, "invalid-oss-fuzz-10723.ivf", "invalid-oss-fuzz-10723.ivf.res.2" }, +#endif + { 1, "invalid-oss-fuzz-10779.ivf", nullptr }, + { 1, "invalid-oss-fuzz-11477.ivf", nullptr }, + { 1, "invalid-oss-fuzz-11479.ivf", "invalid-oss-fuzz-11479.ivf.res.2" }, + { 1, "invalid-oss-fuzz-33030.ivf", nullptr }, +#endif +}; + +AV1_INSTANTIATE_TEST_SUITE(InvalidFileTest, + ::testing::ValuesIn(kAV1InvalidFileTests)); + +} // namespace diff --git a/third_party/aom/test/ivf_video_source.h b/third_party/aom/test/ivf_video_source.h new file mode 100644 index 0000000000..85731f5566 --- /dev/null +++ b/third_party/aom/test/ivf_video_source.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef AOM_TEST_IVF_VIDEO_SOURCE_H_ +#define AOM_TEST_IVF_VIDEO_SOURCE_H_ + +#include <cstdio> +#include <cstdlib> +#include <new> +#include <string> + +#include "aom_ports/sanitizer.h" +#include "test/video_source.h" + +namespace libaom_test { +const unsigned int kCodeBufferSize = 256 * 1024 * 1024; +const unsigned int kIvfFileHdrSize = 32; +const unsigned int kIvfFrameHdrSize = 12; + +static unsigned int MemGetLe32(const uint8_t *mem) { + return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]); +} + +// This class extends VideoSource to allow parsing of ivf files, +// so that we can do actual file decodes. +class IVFVideoSource : public CompressedVideoSource { + public: + explicit IVFVideoSource(const std::string &file_name) + : file_name_(file_name), input_file_(nullptr), + compressed_frame_buf_(nullptr), frame_sz_(0), frame_(0), + end_of_file_(false) {} + + ~IVFVideoSource() override { + delete[] compressed_frame_buf_; + + if (input_file_) fclose(input_file_); + } + + void Init() override { + // Allocate a buffer for read in the compressed video frame. + compressed_frame_buf_ = new uint8_t[kCodeBufferSize]; + ASSERT_NE(compressed_frame_buf_, nullptr) << "Allocate frame buffer failed"; + ASAN_POISON_MEMORY_REGION(compressed_frame_buf_, kCodeBufferSize); + } + + void Begin() override { + input_file_ = OpenTestDataFile(file_name_); + ASSERT_NE(input_file_, nullptr) + << "Input file open failed. Filename: " << file_name_; + + // Read file header + uint8_t file_hdr[kIvfFileHdrSize]; + ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_)) + << "File header read failed."; + // Check file header + ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && + file_hdr[2] == 'I' && file_hdr[3] == 'F') + << "Input is not an IVF file."; + + FillFrame(); + } + + void Next() override { + ++frame_; + FillFrame(); + } + + void FillFrame() { + ASSERT_NE(input_file_, nullptr); + uint8_t frame_hdr[kIvfFrameHdrSize]; + // Check frame header and read a frame from input_file. + if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) != + kIvfFrameHdrSize) { + end_of_file_ = true; + } else { + end_of_file_ = false; + + frame_sz_ = MemGetLe32(frame_hdr); + ASSERT_LE(frame_sz_, kCodeBufferSize) + << "Frame is too big for allocated code buffer"; + ASAN_UNPOISON_MEMORY_REGION(compressed_frame_buf_, kCodeBufferSize); + ASSERT_EQ(frame_sz_, + fread(compressed_frame_buf_, 1, frame_sz_, input_file_)) + << "Failed to read complete frame"; + ASAN_POISON_MEMORY_REGION(compressed_frame_buf_ + frame_sz_, + kCodeBufferSize - frame_sz_); + } + } + + const uint8_t *cxdata() const override { + return end_of_file_ ? nullptr : compressed_frame_buf_; + } + size_t frame_size() const override { return frame_sz_; } + unsigned int frame_number() const override { return frame_; } + + protected: + std::string file_name_; + FILE *input_file_; + uint8_t *compressed_frame_buf_; + size_t frame_sz_; + unsigned int frame_; + bool end_of_file_; +}; + +} // namespace libaom_test + +#endif // AOM_TEST_IVF_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/kf_test.cc b/third_party/aom/test/kf_test.cc new file mode 100644 index 0000000000..7d8cbfe8c6 --- /dev/null +++ b/third_party/aom/test/kf_test.cc @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string.h> + +#include <ostream> + +#include "aom/aom_codec.h" +#include "aom/aom_encoder.h" +#include "aom/aom_image.h" +#include "aom/aomcx.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +#define NUM_LAG_VALUES 3 + +namespace { +aom_image_t *CreateGrayImage(aom_img_fmt_t fmt, unsigned int w, + unsigned int h) { + aom_image_t *const image = aom_img_alloc(nullptr, fmt, w, h, 1); + if (!image) return image; + + for (unsigned int i = 0; i < image->d_h; ++i) { + memset(image->planes[0] + i * image->stride[0], 128, image->d_w); + } + const unsigned int uv_h = (image->d_h + 1) / 2; + const unsigned int uv_w = (image->d_w + 1) / 2; + for (unsigned int i = 0; i < uv_h; ++i) { + memset(image->planes[1] + i * image->stride[1], 128, uv_w); + memset(image->planes[2] + i * image->stride[2], 128, uv_w); + } + return image; +} + +// Tests kf_max_dist in one-pass encoding with zero lag. +void TestKeyFrameMaximumInterval(unsigned int usage, unsigned int kf_max_dist) { + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + ASSERT_EQ(aom_codec_enc_config_default(iface, &cfg, usage), AOM_CODEC_OK); + cfg.g_w = 320; + cfg.g_h = 240; + cfg.g_pass = AOM_RC_ONE_PASS; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = AOM_KF_AUTO; + cfg.kf_min_dist = 0; + cfg.kf_max_dist = kf_max_dist; + + aom_codec_ctx_t enc; + ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK); + + ASSERT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 6), AOM_CODEC_OK); + + aom_image_t *image = CreateGrayImage(AOM_IMG_FMT_I420, cfg.g_w, cfg.g_h); + ASSERT_NE(image, nullptr); + + // Encode frames. + const aom_codec_cx_pkt_t *pkt; + const unsigned int num_frames = kf_max_dist == 0 ? 4 : 3 * kf_max_dist + 1; + for (unsigned int i = 0; i < num_frames; ++i) { + ASSERT_EQ(aom_codec_encode(&enc, image, i, 1, 0), AOM_CODEC_OK); + aom_codec_iter_t iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + if (kf_max_dist == 0 || i % kf_max_dist == 0) { + ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + } else { + ASSERT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u); + } + } + } + + // Flush the encoder. + bool got_data; + do { + ASSERT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK); + got_data = false; + aom_codec_iter_t iter = nullptr; + while ((pkt = aom_codec_get_cx_data(&enc, &iter)) != nullptr) { + ASSERT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + got_data = true; + } + } while (got_data); + + aom_img_free(image); + ASSERT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +TEST(KeyFrameIntervalTest, KeyFrameMaximumInterval) { + for (unsigned int usage : { AOM_USAGE_GOOD_QUALITY, AOM_USAGE_REALTIME }) { + // Test 0 and 1 (both mean all intra), some powers of 2, some multiples of + // 10, and some prime numbers. + for (unsigned int kf_max_dist : + { 0, 1, 2, 3, 4, 7, 10, 13, 16, 20, 23, 29, 32 }) { + TestKeyFrameMaximumInterval(usage, kf_max_dist); + } + } +} + +typedef struct { + const unsigned int min_kf_dist; + const unsigned int max_kf_dist; +} kfIntervalParam; + +const kfIntervalParam kfTestParams[] = { + { 1, 1 }, { 0, 10 }, { 10, 10 }, { 0, 30 }, { 30, 30 } +}; + +std::ostream &operator<<(std::ostream &os, const kfIntervalParam &test_arg) { + return os << "kfIntervalParam { min_kf_dist:" << test_arg.min_kf_dist + << " max_kf_dist:" << test_arg.max_kf_dist << " }"; +} + +// This class is used to test the presence of forward key frame. +class KeyFrameIntervalTestLarge + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, + kfIntervalParam, aom_rc_mode>, + public ::libaom_test::EncoderTest { + protected: + KeyFrameIntervalTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + kf_dist_param_(GET_PARAM(2)), end_usage_check_(GET_PARAM(3)) { + kf_dist_ = -1; + is_kf_interval_violated_ = false; + } + ~KeyFrameIntervalTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = end_usage_check_; + cfg_.g_threads = 1; + cfg_.kf_min_dist = kf_dist_param_.min_kf_dist; + cfg_.kf_max_dist = kf_dist_param_.max_kf_dist; + cfg_.g_lag_in_frames = 19; + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + int frame_flags = 0; + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_FRAME_FLAGS, + &frame_flags); + if (kf_dist_ != -1) { + kf_dist_++; + if (kf_dist_ > (int)kf_dist_param_.max_kf_dist) { + is_kf_interval_violated_ = true; + } + } + if ((frame_flags & AOM_FRAME_IS_KEY) == AOM_FRAME_IS_KEY) { + if (kf_dist_ != -1 && kf_dist_ < (int)kf_dist_param_.min_kf_dist) { + is_kf_interval_violated_ = true; + } + kf_dist_ = 0; + } + } + return AOM_CODEC_OK == res_dec; + } + + ::libaom_test::TestMode encoding_mode_; + const kfIntervalParam kf_dist_param_; + int kf_dist_; + bool is_kf_interval_violated_; + aom_rc_mode end_usage_check_; +}; + +// Because valgrind builds take a very long time to run, use a lower +// resolution video for valgrind runs. +const char *TestFileName() { +#if AOM_VALGRIND_BUILD + return "hantro_collage_w176h144.yuv"; +#else + return "hantro_collage_w352h288.yuv"; +#endif // AOM_VALGRIND_BUILD +} + +int TestFileWidth() { +#if AOM_VALGRIND_BUILD + return 176; +#else + return 352; +#endif // AOM_VALGRIND_BUILD +} + +int TestFileHeight() { +#if AOM_VALGRIND_BUILD + return 144; +#else + return 288; +#endif // AOM_VALGRIND_BUILD +} + +TEST_P(KeyFrameIntervalTestLarge, KeyFrameIntervalTest) { + libaom_test::I420VideoSource video(TestFileName(), TestFileWidth(), + TestFileHeight(), cfg_.g_timebase.den, + cfg_.g_timebase.num, 0, 75); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(is_kf_interval_violated_, false) << kf_dist_param_; +} + +// This class tests for presence and placement of application forced key frames. +class ForcedKeyTestLarge + : public ::libaom_test::CodecTestWith5Params<libaom_test::TestMode, int, + int, int, aom_rc_mode>, + public ::libaom_test::EncoderTest { + protected: + ForcedKeyTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + auto_alt_ref_(GET_PARAM(2)), fwd_kf_enabled_(GET_PARAM(3)), + cpu_used_(GET_PARAM(4)), rc_end_usage_(GET_PARAM(5)) { + forced_kf_frame_num_ = 1; + frame_num_ = 0; + is_kf_placement_violated_ = false; + } + ~ForcedKeyTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + cfg_.rc_end_usage = rc_end_usage_; + cfg_.g_threads = 0; + cfg_.kf_max_dist = 30; + cfg_.kf_min_dist = 0; + cfg_.fwd_kf_enabled = fwd_kf_enabled_; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, auto_alt_ref_); +#if CONFIG_AV1_ENCODER + // override test default for tile columns if necessary. + if (GET_PARAM(0) == &libaom_test::kAV1) { + encoder->Control(AV1E_SET_TILE_COLUMNS, 6); + } +#endif + } + frame_flags_ = + ((int)video->frame() == forced_kf_frame_num_) ? AOM_EFLAG_FORCE_KF : 0; + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec) { + if ((int)frame_num_ == forced_kf_frame_num_) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + int frame_flags = 0; + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_FRAME_FLAGS, + &frame_flags); + if ((frame_flags & AOM_FRAME_IS_KEY) != AOM_FRAME_IS_KEY) { + is_kf_placement_violated_ = true; + } + } + ++frame_num_; + } + return AOM_CODEC_OK == res_dec; + } + + void Frame1IsKey(); + void ForcedFrameIsKey(); + void ForcedFrameIsKeyCornerCases(); + + ::libaom_test::TestMode encoding_mode_; + int auto_alt_ref_; + int fwd_kf_enabled_; + int cpu_used_; + aom_rc_mode rc_end_usage_; + int forced_kf_frame_num_; + unsigned int frame_num_; + bool is_kf_placement_violated_; +}; + +void ForcedKeyTestLarge::Frame1IsKey() { + const aom_rational timebase = { 1, 30 }; + // 1st element of this 2D array is for good encoding mode and 2nd element + // is for RT encoding mode. + const int lag_values[2][NUM_LAG_VALUES] = { { 3, 15, 25 }, { 0, -1, -1 } }; + int is_realtime = (encoding_mode_ == ::libaom_test::kRealTime); + + forced_kf_frame_num_ = 1; + for (int i = 0; i < NUM_LAG_VALUES; ++i) { + if (lag_values[is_realtime][i] == -1) continue; + frame_num_ = 0; + cfg_.g_lag_in_frames = lag_values[is_realtime][i]; + is_kf_placement_violated_ = false; + libaom_test::I420VideoSource video( + TestFileName(), TestFileWidth(), TestFileHeight(), timebase.den, + timebase.num, 0, fwd_kf_enabled_ ? 60 : 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(is_kf_placement_violated_, false) + << "Frame #" << frame_num_ << " isn't a keyframe!"; + } +} + +// This class checks the presence and placement of application +// forced key frames. +void ForcedKeyTestLarge::ForcedFrameIsKey() { + const aom_rational timebase = { 1, 30 }; + const int lag_values[] = { 3, 15, 25, -1 }; + + for (int i = 0; lag_values[i] != -1; ++i) { + frame_num_ = 0; + forced_kf_frame_num_ = lag_values[i] - 1; + cfg_.g_lag_in_frames = lag_values[i]; + is_kf_placement_violated_ = false; + libaom_test::I420VideoSource video( + TestFileName(), TestFileWidth(), TestFileHeight(), timebase.den, + timebase.num, 0, fwd_kf_enabled_ ? 60 : 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(is_kf_placement_violated_, false) + << "Frame #" << frame_num_ << " isn't a keyframe!"; + + // Two pass and single pass CBR are currently segfaulting for the case when + // forced kf is placed after lag in frames. + // TODO(anyone): Enable(uncomment) below test once above bug is fixed. + // frame_num_ = 0; + // forced_kf_frame_num_ = lag_values[i] + 1; + // cfg_.g_lag_in_frames = lag_values[i]; + // is_kf_placement_violated_ = false; + // ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // ASSERT_EQ(is_kf_placement_violated_, false) + // << "Frame #" << frame_num_ << " isn't a keyframe!"; + } +} + +void ForcedKeyTestLarge::ForcedFrameIsKeyCornerCases() { + const aom_rational timebase = { 1, 30 }; + const int kf_offsets[] = { -2, -1, 1, 2, 0 }; + cfg_.g_lag_in_frames = 35; + if (encoding_mode_ == ::libaom_test::kRealTime) cfg_.g_lag_in_frames = 0; + + for (int i = 0; kf_offsets[i] != 0; ++i) { + frame_num_ = 0; + forced_kf_frame_num_ = (int)cfg_.kf_max_dist + kf_offsets[i]; + forced_kf_frame_num_ = forced_kf_frame_num_ > 0 ? forced_kf_frame_num_ : 1; + is_kf_placement_violated_ = false; + libaom_test::I420VideoSource video( + TestFileName(), TestFileWidth(), TestFileHeight(), timebase.den, + timebase.num, 0, fwd_kf_enabled_ ? 60 : 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(is_kf_placement_violated_, false) + << "Frame #" << frame_num_ << " isn't a keyframe!"; + } +} + +AV1_INSTANTIATE_TEST_SUITE(KeyFrameIntervalTestLarge, + testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood), + ::testing::ValuesIn(kfTestParams), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ)); + +TEST_P(ForcedKeyTestLarge, Frame1IsKey) { Frame1IsKey(); } +TEST_P(ForcedKeyTestLarge, ForcedFrameIsKey) { ForcedFrameIsKey(); } +TEST_P(ForcedKeyTestLarge, ForcedFrameIsKeyCornerCases) { + ForcedFrameIsKeyCornerCases(); +} + +class ForcedKeyRTTestLarge : public ForcedKeyTestLarge {}; + +TEST_P(ForcedKeyRTTestLarge, Frame1IsKey) { Frame1IsKey(); } +TEST_P(ForcedKeyRTTestLarge, ForcedFrameIsKeyCornerCases) { + ForcedFrameIsKeyCornerCases(); +} +// TODO(anyone): Add CBR to list of rc_modes once forced kf placement after +// lag in frames bug is fixed. +AV1_INSTANTIATE_TEST_SUITE(ForcedKeyTestLarge, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood), + ::testing::Values(0, 1), ::testing::Values(0, 1), + ::testing::Values(2, 5), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CQ)); +AV1_INSTANTIATE_TEST_SUITE(ForcedKeyRTTestLarge, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Values(0), ::testing::Values(0), + ::testing::Values(7, 9), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR)); +} // namespace diff --git a/third_party/aom/test/level_test.cc b/third_party/aom/test/level_test.cc new file mode 100644 index 0000000000..a7c26d2305 --- /dev/null +++ b/third_party/aom/test/level_test.cc @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <memory> +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" + +namespace { +const int kLevelMin = 0; +const int kLevelMax = 31; +const int kLevelKeepStats = 32; +// Speed settings tested +static const int kCpuUsedVectors[] = { + 1, + 2, + 3, + 4, +}; + +class LevelTest + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, + public ::libaom_test::EncoderTest { + protected: + LevelTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), target_level_(31) {} + + ~LevelTest() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 5; + } else { + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + } + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AV1E_SET_TARGET_SEQ_LEVEL_IDX, target_level_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + int num_operating_points; + encoder->Control(AV1E_GET_NUM_OPERATING_POINTS, &num_operating_points); + ASSERT_EQ(num_operating_points, 1); + encoder->Control(AV1E_GET_SEQ_LEVEL_IDX, level_); + ASSERT_LE(level_[0], kLevelMax); + ASSERT_GE(level_[0], kLevelMin); + } + + libaom_test::TestMode encoding_mode_; + int cpu_used_; + int target_level_; + int level_[32]; +}; + +TEST(LevelTest, TestTargetLevelApi) { + aom_codec_iface_t *codec = aom_codec_av1_cx(); + aom_codec_ctx_t enc; + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(codec, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, codec, &cfg, 0)); + for (int operating_point = 0; operating_point <= 32; ++operating_point) { + for (int level = 0; level <= 32; ++level) { + const int target_level = operating_point * 100 + level; + if (operating_point <= 31 && + ((level < (CONFIG_CWG_C013 ? 28 : 20) && level != 2 && level != 3 && + level != 6 && level != 7 && level != 10 && level != 11) || + level == kLevelMax || level == kLevelKeepStats)) { + EXPECT_EQ(AOM_CODEC_OK, + AOM_CODEC_CONTROL_TYPECHECKED( + &enc, AV1E_SET_TARGET_SEQ_LEVEL_IDX, target_level)); + } else { + EXPECT_EQ(AOM_CODEC_INVALID_PARAM, + AOM_CODEC_CONTROL_TYPECHECKED( + &enc, AV1E_SET_TARGET_SEQ_LEVEL_IDX, target_level)); + } + } + } + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +TEST(LevelTest, InvalidOperatingPointIndexErrorDetail) { + aom_codec_iface_t *codec = aom_codec_av1_cx(); + aom_codec_ctx_t enc; + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(aom_codec_enc_config_default(codec, &cfg, 0), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_enc_init(&enc, codec, &cfg, 0), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TARGET_SEQ_LEVEL_IDX, 3219), + AOM_CODEC_INVALID_PARAM); + EXPECT_EQ(aom_codec_error_detail(&enc), + std::string("Invalid operating point index: 32")); + EXPECT_EQ(aom_codec_set_option(&enc, "target-seq-level-idx", "3319"), + AOM_CODEC_INVALID_PARAM); + EXPECT_EQ(aom_codec_error_detail(&enc), + std::string("Invalid operating point index: 33")); + EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +TEST_P(LevelTest, TestTargetLevel19) { + std::unique_ptr<libaom_test::VideoSource> video; + video.reset(new libaom_test::Y4mVideoSource("park_joy_90p_8_420.y4m", 0, 10)); + ASSERT_NE(video, nullptr); + // Level index 19 corresponding to level 6.3. + target_level_ = 19; + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); +} + +TEST_P(LevelTest, TestLevelMonitoringLowBitrate) { + // To save run time, we only test speed 4. + if (cpu_used_ == 4) { + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 40); + target_level_ = kLevelKeepStats; + cfg_.rc_target_bitrate = 1000; + cfg_.g_limit = 40; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(level_[0], 0); + } +} + +TEST_P(LevelTest, TestLevelMonitoringHighBitrate) { + // To save run time, we only test speed 4. + if (cpu_used_ == 4) { + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 40); + target_level_ = kLevelKeepStats; + cfg_.rc_target_bitrate = 4000; + cfg_.g_limit = 40; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(level_[0], 4); + } +} + +TEST_P(LevelTest, TestTargetLevel0) { + // To save run time, we only test speed 4. + if (cpu_used_ == 4) { + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 50); + const int target_level = 0; + target_level_ = target_level; + cfg_.rc_target_bitrate = 4000; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(level_[0], target_level); + } +} + +TEST_P(LevelTest, TestTargetLevelRecode) { + if (cpu_used_ == 4 && encoding_mode_ == ::libaom_test::kTwoPassGood) { + libaom_test::I420VideoSource video("rand_noise_w1280h720.yuv", 1280, 720, + 25, 1, 0, 10); + const int target_level = 0005; + target_level_ = target_level; + cfg_.rc_target_bitrate = 5000; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } +} + +AV1_INSTANTIATE_TEST_SUITE(LevelTest, + ::testing::Values(::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood), + ::testing::ValuesIn(kCpuUsedVectors)); +} // namespace diff --git a/third_party/aom/test/lightfield_test.sh b/third_party/aom/test/lightfield_test.sh new file mode 100755 index 0000000000..cf1ea73a84 --- /dev/null +++ b/third_party/aom/test/lightfield_test.sh @@ -0,0 +1,115 @@ +#!/bin/sh +## Copyright (c) 2018, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the lightfield example. +## +. $(dirname $0)/tools_common.sh + +# Environment check: $infile is required. +lightfield_test_verify_environment() { + local infile="${LIBAOM_TEST_DATA_PATH}/vase10x10.yuv" + if [ ! -e "${infile}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi +} + +# Run the lightfield example +lightfield_test() { + local img_width=1024 + local img_height=1024 + local lf_width=10 + local lf_height=10 + local lf_blocksize=5 + local num_references=4 + local num_tile_lists=2 + + # Encode the lightfield. + local encoder="${LIBAOM_BIN_PATH}/lightfield_encoder${AOM_TEST_EXE_SUFFIX}" + local yuv_file="${LIBAOM_TEST_DATA_PATH}/vase10x10.yuv" + local lf_file="${AOM_TEST_OUTPUT_DIR}/vase10x10.ivf" + if [ ! -x "${encoder}" ]; then + elog "${encoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${img_width}" "${img_height}" \ + "${yuv_file}" "${lf_file}" "${lf_width}" \ + "${lf_height}" "${lf_blocksize}" ${devnull} || return 1 + + [ -e "${lf_file}" ] || return 1 + + # Check to ensure all camera frames have the identical frame header. If not identical, this test fails. + for i in ./fh*; do + diff ./fh004 $i > /dev/null + if [ $? -eq 1 ]; then + return 1 + fi + done + + # Check to ensure all camera frames use the identical frame context. If not identical, this test fails. + for i in ./fc*; do + diff ./fc004 $i > /dev/null + if [ $? -eq 1 ]; then + return 1 + fi + done + + # Parse lightfield bitstream to construct and output a new bitstream that can + # be decoded by an AV1 decoder. + local bs_decoder="${LIBAOM_BIN_PATH}/lightfield_bitstream_parsing${AOM_TEST_EXE_SUFFIX}" + local tl_file="${AOM_TEST_OUTPUT_DIR}/vase_tile_list.ivf" + local tl_text_file="${LIBAOM_TEST_DATA_PATH}/vase10x10_tiles.txt" + if [ ! -x "${bs_decoder}" ]; then + elog "${bs_decoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${bs_decoder}" "${lf_file}" "${tl_file}" \ + "${num_references}" "${tl_text_file}" ${devnull} || return 1 + + [ -e "${tl_file}" ] || return 1 + + # Run lightfield tile list decoder + local tl_decoder="${LIBAOM_BIN_PATH}/lightfield_tile_list_decoder${AOM_TEST_EXE_SUFFIX}" + local tl_outfile="${AOM_TEST_OUTPUT_DIR}/vase_tile_list.yuv" + if [ ! -x "${tl_decoder}" ]; then + elog "${tl_decoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${tl_decoder}" "${tl_file}" "${tl_outfile}" \ + "${num_references}" "${num_tile_lists}" ${devnull} || return 1 + + [ -e "${tl_outfile}" ] || return 1 + + # Run reference lightfield decoder + local ref_decoder="${LIBAOM_BIN_PATH}/lightfield_decoder${AOM_TEST_EXE_SUFFIX}" + local tl_reffile="${AOM_TEST_OUTPUT_DIR}/vase_reference.yuv" + if [ ! -x "${ref_decoder}" ]; then + elog "${ref_decoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${ref_decoder}" "${lf_file}" "${tl_reffile}" \ + "${num_references}" "${tl_text_file}" ${devnull} || return 1 + + [ -e "${tl_reffile}" ] || return 1 + + # Check if tl_outfile and tl_reffile are identical. If not identical, this test fails. + diff ${tl_outfile} ${tl_reffile} > /dev/null + if [ $? -eq 1 ]; then + return 1 + fi +} + +lightfield_test_tests="lightfield_test" + +run_tests lightfield_test_verify_environment "${lightfield_test_tests}" diff --git a/third_party/aom/test/log2_test.cc b/third_party/aom/test/log2_test.cc new file mode 100644 index 0000000000..71cf8b25d9 --- /dev/null +++ b/third_party/aom/test/log2_test.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <limits.h> +#include <math.h> + +#include "aom_ports/bitops.h" +#include "av1/common/entropymode.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +TEST(Log2Test, GetMsb) { + // Test small numbers exhaustively. + for (unsigned int n = 1; n < 10000; n++) { + EXPECT_EQ(get_msb(n), static_cast<int>(floor(log2(n)))); + } + + // Test every power of 2 and the two adjacent numbers. + for (int exponent = 2; exponent < 32; exponent++) { + const unsigned int power_of_2 = 1U << exponent; + EXPECT_EQ(get_msb(power_of_2 - 1), exponent - 1); + EXPECT_EQ(get_msb(power_of_2), exponent); + EXPECT_EQ(get_msb(power_of_2 + 1), exponent); + } +} + +TEST(Log2Test, Av1CeilLog2) { + // Test small numbers exhaustively. + EXPECT_EQ(av1_ceil_log2(0), 0); + for (int n = 1; n < 10000; n++) { + EXPECT_EQ(av1_ceil_log2(n), static_cast<int>(ceil(log2(n)))); + } + + // Test every power of 2 and the two adjacent numbers. + for (int exponent = 2; exponent < 31; exponent++) { + const int power_of_2 = 1 << exponent; + EXPECT_EQ(av1_ceil_log2(power_of_2 - 1), exponent); + EXPECT_EQ(av1_ceil_log2(power_of_2), exponent); + EXPECT_EQ(av1_ceil_log2(power_of_2 + 1), exponent + 1); + } + + // INT_MAX = 2^31 - 1 + EXPECT_EQ(av1_ceil_log2(INT_MAX), 31); +} diff --git a/third_party/aom/test/loopfilter_control_test.cc b/third_party/aom/test/loopfilter_control_test.cc new file mode 100644 index 0000000000..9c00235e19 --- /dev/null +++ b/third_party/aom/test/loopfilter_control_test.cc @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> +#include <string> +#include <unordered_map> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" + +namespace { + +const unsigned int kFrames = 10; +const int kBitrate = 500; + +// List of psnr thresholds for LF settings 0-3 +// keys: video, LF control, aq mode. +std::unordered_map<std::string, + std::unordered_map<int, std::unordered_map<int, double>>> + kPsnrThreshold = { { "park_joy_90p_8_420.y4m", + { { 0, { { 0, 35.0 }, { 3, 35.8 } } }, + { 1, { { 0, 35.1 }, { 3, 35.9 } } }, + { 2, { { 0, 35.1 }, { 3, 36.1 } } }, + { 3, { { 0, 35.1 }, { 3, 36.1 } } } } }, + { "paris_352_288_30.y4m", + { { 0, { { 0, 35.40 }, { 3, 36.0 } } }, + { 1, { { 0, 35.50 }, { 3, 36.0 } } }, + { 2, { { 0, 35.50 }, { 3, 36.0 } } }, + { 3, { { 0, 35.50 }, { 3, 36.0 } } } } }, + { "niklas_1280_720_30.y4m", + { { 0, { { 0, 33.20 }, { 3, 32.90 } } }, + { 1, { { 0, 33.57 }, { 3, 33.22 } } }, + { 2, { { 0, 33.57 }, { 3, 33.22 } } }, + { 3, { { 0, 33.45 }, { 3, 33.10 } } } } } }; + +typedef struct { + const char *filename; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) { + return os << "TestVideoParam { filename:" << test_arg.filename + << " input_bit_depth:" << test_arg.input_bit_depth + << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth + << " profile:" << test_arg.profile << " }"; +} + +const TestVideoParam kTestVectors[] = { + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "paris_352_288_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "niklas_1280_720_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, +}; + +// Params: test video, lf_control, aq mode, threads, tile columns. +class LFControlEndToEndTest + : public ::libaom_test::CodecTestWith5Params<TestVideoParam, int, + unsigned int, int, int>, + public ::libaom_test::EncoderTest { + protected: + LFControlEndToEndTest() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), + lf_control_(GET_PARAM(2)), psnr_(0.0), nframes_(0), + aq_mode_(GET_PARAM(3)), threads_(GET_PARAM(4)), + tile_columns_(GET_PARAM(5)) {} + + ~LFControlEndToEndTest() override = default; + + void SetUp() override { + InitializeConfig(::libaom_test::kRealTime); + + cfg_.g_threads = threads_; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.kf_max_dist = 9999; + cfg_.kf_min_dist = 9999; + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_ENABLE_RESTORATION, 0); + encoder->Control(AV1E_SET_ENABLE_OBMC, 0); + encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_WARPED_MOTION, 0); + encoder->Control(AV1E_SET_DELTAQ_MODE, 0); + encoder->Control(AV1E_SET_ENABLE_TPL_MODEL, 0); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_); + encoder->Control(AOME_SET_CPUUSED, 10); + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); + encoder->Control(AV1E_SET_AQ_MODE, aq_mode_); + encoder->Control(AV1E_SET_ROW_MT, 1); + encoder->Control(AV1E_SET_ENABLE_CDEF, 1); + encoder->Control(AV1E_SET_COEFF_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_MODE_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_MV_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_DV_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_LOOPFILTER_CONTROL, lf_control_); + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetPsnrThreshold() { + return kPsnrThreshold[test_video_param_.filename][lf_control_][aq_mode_]; + } + + void DoTest() { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + std::unique_ptr<libaom_test::VideoSource> video; + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, GetPsnrThreshold()) + << "loopfilter control = " << lf_control_ << " aq mode = " << aq_mode_; + } + + TestVideoParam test_video_param_; + int lf_control_; + + private: + double psnr_; + unsigned int nframes_; + unsigned int aq_mode_; + int threads_; + int tile_columns_; +}; + +class LFControlEndToEndTestThreaded : public LFControlEndToEndTest {}; + +TEST_P(LFControlEndToEndTest, EndtoEndPSNRTest) { DoTest(); } + +TEST_P(LFControlEndToEndTestThreaded, EndtoEndPSNRTest) { DoTest(); } + +TEST(LFControlGetterTest, NullptrInput) { + int *lf_level = nullptr; + aom_codec_ctx_t encoder; + aom_codec_enc_cfg_t cfg; + aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg, 1); + EXPECT_EQ(aom_codec_enc_init(&encoder, aom_codec_av1_cx(), &cfg, 0), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&encoder, AOME_GET_LOOPFILTER_LEVEL, lf_level), + AOM_CODEC_INVALID_PARAM); + EXPECT_EQ(aom_codec_destroy(&encoder), AOM_CODEC_OK); +} + +AV1_INSTANTIATE_TEST_SUITE(LFControlEndToEndTest, + ::testing::ValuesIn(kTestVectors), + ::testing::Range(0, 4), + ::testing::Values<unsigned int>(0, 3), + ::testing::Values(1), ::testing::Values(1)); + +AV1_INSTANTIATE_TEST_SUITE(LFControlEndToEndTestThreaded, + ::testing::ValuesIn(kTestVectors), + ::testing::Range(0, 4), + ::testing::Values<unsigned int>(0, 3), + ::testing::Range(2, 5), ::testing::Range(2, 5)); +} // namespace diff --git a/third_party/aom/test/lossless_test.cc b/third_party/aom/test/lossless_test.cc new file mode 100644 index 0000000000..756ad05019 --- /dev/null +++ b/third_party/aom/test/lossless_test.cc @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +namespace { + +const int kMaxPsnr = 100; + +class LosslessTestLarge + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, + aom_rc_mode, int>, + public ::libaom_test::EncoderTest { + protected: + LosslessTestLarge() + : EncoderTest(GET_PARAM(0)), psnr_(kMaxPsnr), nframes_(0), + encoding_mode_(GET_PARAM(1)), rc_end_usage_(GET_PARAM(2)), + cpu_used_(GET_PARAM(3)) {} + + ~LosslessTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + cfg_.rc_end_usage = rc_end_usage_; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + // Only call Control if quantizer > 0 to verify that using quantizer + // alone will activate lossless + if (cfg_.rc_max_quantizer > 0 || cfg_.rc_min_quantizer > 0) { + encoder->Control(AV1E_SET_LOSSLESS, 1); + } + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + } + } + + void BeginPassHook(unsigned int /*pass*/) override { + psnr_ = kMaxPsnr; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + if (pkt->data.psnr.psnr[0] < psnr_) psnr_ = pkt->data.psnr.psnr[0]; + } + + double GetMinPsnr() const { return psnr_; } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_LAST_QUANTIZER, + &base_qindex_); + EXPECT_EQ(base_qindex_, 0) + << "Error: Base_qindex is non zero for lossless coding"; + } + return AOM_CODEC_OK == res_dec; + } + + void TestLosslessEncoding(); + void TestLosslessEncodingVGALag0(); + void TestLosslessEncoding444(); + void TestLosslessEncodingCtrl(); + + private: + double psnr_; + unsigned int nframes_; + libaom_test::TestMode encoding_mode_; + aom_rc_mode rc_end_usage_; + int cpu_used_; + int base_qindex_; +}; + +void LosslessTestLarge::TestLosslessEncoding() { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 2000; + cfg_.g_lag_in_frames = 25; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 0; + + init_flags_ = AOM_CODEC_USE_PSNR; + + // intentionally changed the dimension for better testing coverage + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 5); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_lossless = GetMinPsnr(); + EXPECT_GE(psnr_lossless, kMaxPsnr); +} + +void LosslessTestLarge::TestLosslessEncodingVGALag0() { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 2000; + cfg_.g_lag_in_frames = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 0; + + init_flags_ = AOM_CODEC_USE_PSNR; + + libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, + timebase.den, timebase.num, 0, 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_lossless = GetMinPsnr(); + EXPECT_GE(psnr_lossless, kMaxPsnr); +} + +void LosslessTestLarge::TestLosslessEncoding444() { + libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 5); + + cfg_.g_profile = 1; + cfg_.g_timebase = video.timebase(); + cfg_.rc_target_bitrate = 2000; + cfg_.g_lag_in_frames = 25; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 0; + + init_flags_ = AOM_CODEC_USE_PSNR; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_lossless = GetMinPsnr(); + EXPECT_GE(psnr_lossless, kMaxPsnr); +} + +void LosslessTestLarge::TestLosslessEncodingCtrl() { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 2000; + cfg_.g_lag_in_frames = 25; + // Intentionally set Q > 0, to make sure control can be used to activate + // lossless + cfg_.rc_min_quantizer = 10; + cfg_.rc_max_quantizer = 20; + + init_flags_ = AOM_CODEC_USE_PSNR; + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 5); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_lossless = GetMinPsnr(); + EXPECT_GE(psnr_lossless, kMaxPsnr); +} + +TEST_P(LosslessTestLarge, TestLosslessEncoding) { TestLosslessEncoding(); } + +TEST_P(LosslessTestLarge, TestLosslessEncodingVGALag0) { + TestLosslessEncodingVGALag0(); +} + +TEST_P(LosslessTestLarge, TestLosslessEncoding444) { + TestLosslessEncoding444(); +} + +TEST_P(LosslessTestLarge, TestLosslessEncodingCtrl) { + TestLosslessEncodingCtrl(); +} + +class LosslessAllIntraTestLarge : public LosslessTestLarge {}; + +TEST_P(LosslessAllIntraTestLarge, TestLosslessEncodingCtrl) { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + // Intentionally set Q > 0, to make sure control can be used to activate + // lossless + cfg_.rc_min_quantizer = 10; + cfg_.rc_max_quantizer = 20; + + init_flags_ = AOM_CODEC_USE_PSNR; + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + timebase.den, timebase.num, 0, 5); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_lossless = GetMinPsnr(); + EXPECT_GE(psnr_lossless, kMaxPsnr); +} + +using LosslessRealtimeTestLarge = LosslessTestLarge; + +TEST_P(LosslessRealtimeTestLarge, TestLosslessEncoding) { + TestLosslessEncoding(); +} + +TEST_P(LosslessRealtimeTestLarge, TestLosslessEncodingVGALag0) { + TestLosslessEncodingVGALag0(); +} + +TEST_P(LosslessRealtimeTestLarge, TestLosslessEncoding444) { + TestLosslessEncoding444(); +} + +TEST_P(LosslessRealtimeTestLarge, TestLosslessEncodingCtrl) { + TestLosslessEncodingCtrl(); +} + +AV1_INSTANTIATE_TEST_SUITE(LosslessTestLarge, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ), + ::testing::Values(0)); // cpu_used + +AV1_INSTANTIATE_TEST_SUITE(LosslessAllIntraTestLarge, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::Values(AOM_Q), + ::testing::Values(6, 9)); // cpu_used + +AV1_INSTANTIATE_TEST_SUITE(LosslessRealtimeTestLarge, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ), + ::testing::Range(6, 11)); // cpu_used +} // namespace diff --git a/third_party/aom/test/lpf_test.cc b/third_party/aom/test/lpf_test.cc new file mode 100644 index 0000000000..04b1c86d4d --- /dev/null +++ b/third_party/aom/test/lpf_test.cc @@ -0,0 +1,824 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cmath> +#include <cstdlib> +#include <string> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/av1_loopfilter.h" +#include "av1/common/entropy.h" +#include "aom/aom_integer.h" + +using libaom_test::ACMRandom; + +namespace { +// Horizontally and Vertically need 32x32: 8 Coeffs preceeding filtered section +// 16 Coefs within filtered section +// 8 Coeffs following filtered section +const int kNumCoeffs = 1024; + +const int number_of_iterations = 10000; + +const int kSpeedTestNum = 500000; + +#define LOOP_PARAM \ + int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh +#define DUAL_LOOP_PARAM \ + int p, const uint8_t *blimit0, const uint8_t *limit0, \ + const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, \ + const uint8_t *thresh1 + +typedef void (*loop_op_t)(uint8_t *s, LOOP_PARAM); +typedef void (*dual_loop_op_t)(uint8_t *s, DUAL_LOOP_PARAM); +typedef void (*hbdloop_op_t)(uint16_t *s, LOOP_PARAM, int bd); +typedef void (*hbddual_loop_op_t)(uint16_t *s, DUAL_LOOP_PARAM, int bd); + +typedef std::tuple<hbdloop_op_t, hbdloop_op_t, int> hbdloop_param_t; +typedef std::tuple<hbddual_loop_op_t, hbddual_loop_op_t, int> + hbddual_loop_param_t; +typedef std::tuple<loop_op_t, loop_op_t, int> loop_param_t; +typedef std::tuple<dual_loop_op_t, dual_loop_op_t, int> dual_loop_param_t; + +template <typename Pixel_t, int PIXEL_WIDTH_t> +void InitInput(Pixel_t *s, Pixel_t *ref_s, ACMRandom *rnd, const uint8_t limit, + const int mask, const int32_t p, const int i) { + uint16_t tmp_s[kNumCoeffs]; + + for (int j = 0; j < kNumCoeffs;) { + const uint8_t val = rnd->Rand8(); + if (val & 0x80) { // 50% chance to choose a new value. + tmp_s[j] = rnd->Rand16(); + j++; + } else { // 50% chance to repeat previous value in row X times. + int k = 0; + while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) { + if (j < 1) { + tmp_s[j] = rnd->Rand16(); + } else if (val & 0x20) { // Increment by a value within the limit. + tmp_s[j] = static_cast<uint16_t>(tmp_s[j - 1] + (limit - 1)); + } else { // Decrement by a value within the limit. + tmp_s[j] = static_cast<uint16_t>(tmp_s[j - 1] - (limit - 1)); + } + j++; + } + } + } + + for (int j = 0; j < kNumCoeffs;) { + const uint8_t val = rnd->Rand8(); + if (val & 0x80) { + j++; + } else { // 50% chance to repeat previous value in column X times. + int k = 0; + while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) { + if (j < 1) { + tmp_s[j] = rnd->Rand16(); + } else if (val & 0x20) { // Increment by a value within the limit. + tmp_s[(j % 32) * 32 + j / 32] = static_cast<uint16_t>( + tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] + (limit - 1)); + } else { // Decrement by a value within the limit. + tmp_s[(j % 32) * 32 + j / 32] = static_cast<uint16_t>( + tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] - (limit - 1)); + } + j++; + } + } + } + + for (int j = 0; j < kNumCoeffs; j++) { + if (i % 2) { + s[j] = tmp_s[j] & mask; + } else { + s[j] = tmp_s[p * (j % p) + j / p] & mask; + } + ref_s[j] = s[j]; + } +} + +uint8_t GetOuterThresh(ACMRandom *rnd) { + return static_cast<uint8_t>(rnd->PseudoUniform(3 * MAX_LOOP_FILTER + 5)); +} + +uint8_t GetInnerThresh(ACMRandom *rnd) { + return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1)); +} + +uint8_t GetHevThresh(ACMRandom *rnd) { + return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1) >> 4); +} + +template <typename func_type_t, typename params_t> +class LoopTestParam : public ::testing::TestWithParam<params_t> { + public: + ~LoopTestParam() override = default; + void SetUp() override { + loopfilter_op_ = std::get<0>(this->GetParam()); + ref_loopfilter_op_ = std::get<1>(this->GetParam()); + bit_depth_ = std::get<2>(this->GetParam()); + mask_ = (1 << bit_depth_) - 1; + } + + protected: + int bit_depth_; + int mask_; + func_type_t loopfilter_op_; + func_type_t ref_loopfilter_op_; +}; + +#if CONFIG_AV1_HIGHBITDEPTH +void call_filter(uint16_t *s, LOOP_PARAM, int bd, hbdloop_op_t op) { + op(s, p, blimit, limit, thresh, bd); +} +void call_dualfilter(uint16_t *s, DUAL_LOOP_PARAM, int bd, + hbddual_loop_op_t op) { + op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd); +} +#endif +void call_filter(uint8_t *s, LOOP_PARAM, int bd, loop_op_t op) { + (void)bd; + op(s, p, blimit, limit, thresh); +} +void call_dualfilter(uint8_t *s, DUAL_LOOP_PARAM, int bd, dual_loop_op_t op) { + (void)bd; + op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1); +} + +#if CONFIG_AV1_HIGHBITDEPTH +typedef LoopTestParam<hbdloop_op_t, hbdloop_param_t> Loop8Test6Param_hbd; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Loop8Test6Param_hbd); +typedef LoopTestParam<hbddual_loop_op_t, hbddual_loop_param_t> + Loop8Test9Param_hbd; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Loop8Test9Param_hbd); +#endif +typedef LoopTestParam<loop_op_t, loop_param_t> Loop8Test6Param_lbd; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Loop8Test6Param_lbd); +typedef LoopTestParam<dual_loop_op_t, dual_loop_param_t> Loop8Test9Param_lbd; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Loop8Test9Param_lbd); + +#define OPCHECK(a, b) \ + do { \ + ACMRandom rnd(ACMRandom::DeterministicSeed()); \ + const int count_test_block = number_of_iterations; \ + const int32_t p = kNumCoeffs / 32; \ + DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \ + DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]); \ + int err_count_total = 0; \ + int first_failure = -1; \ + for (int i = 0; i < count_test_block; ++i) { \ + int err_count = 0; \ + uint8_t tmp = GetOuterThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = { tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetInnerThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + tmp = GetHevThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = { tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + InitInput<a, b>(s, ref_s, &rnd, *limit, mask_, p, i); \ + call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_, \ + ref_loopfilter_op_); \ + API_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit, \ + thresh, bit_depth_, \ + loopfilter_op_)); \ + for (int j = 0; j < kNumCoeffs; ++j) { \ + err_count += ref_s[j] != s[j]; \ + } \ + if (err_count && !err_count_total) { \ + first_failure = i; \ + } \ + err_count_total += err_count; \ + } \ + EXPECT_EQ(0, err_count_total) \ + << "Error: Loop8Test6Param, C output doesn't match SIMD " \ + "loopfilter output. " \ + << "First failed at test case " << first_failure; \ + } while (false) + +#if CONFIG_AV1_HIGHBITDEPTH +TEST_P(Loop8Test6Param_hbd, OperationCheck) { OPCHECK(uint16_t, 16); } +#endif +TEST_P(Loop8Test6Param_lbd, OperationCheck) { OPCHECK(uint8_t, 8); } + +#define VALCHECK(a, b) \ + do { \ + ACMRandom rnd(ACMRandom::DeterministicSeed()); \ + const int count_test_block = number_of_iterations; \ + DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \ + DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]); \ + int err_count_total = 0; \ + int first_failure = -1; \ + for (int i = 0; i < count_test_block; ++i) { \ + int err_count = 0; \ + uint8_t tmp = GetOuterThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = { tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetInnerThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + tmp = GetHevThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = { tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + int32_t p = kNumCoeffs / 32; \ + for (int j = 0; j < kNumCoeffs; ++j) { \ + s[j] = rnd.Rand16() & mask_; \ + ref_s[j] = s[j]; \ + } \ + call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_, \ + ref_loopfilter_op_); \ + API_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit, \ + thresh, bit_depth_, \ + loopfilter_op_)); \ + for (int j = 0; j < kNumCoeffs; ++j) { \ + err_count += ref_s[j] != s[j]; \ + } \ + if (err_count && !err_count_total) { \ + first_failure = i; \ + } \ + err_count_total += err_count; \ + } \ + EXPECT_EQ(0, err_count_total) \ + << "Error: Loop8Test6Param, C output doesn't match SIMD " \ + "loopfilter output. " \ + << "First failed at test case " << first_failure; \ + } while (false) + +#if CONFIG_AV1_HIGHBITDEPTH +TEST_P(Loop8Test6Param_hbd, ValueCheck) { VALCHECK(uint16_t, 16); } +#endif +TEST_P(Loop8Test6Param_lbd, ValueCheck) { VALCHECK(uint8_t, 8); } + +#define SPEEDCHECK(a, b) \ + do { \ + ACMRandom rnd(ACMRandom::DeterministicSeed()); \ + const int count_test_block = kSpeedTestNum; \ + const int32_t bd = bit_depth_; \ + DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \ + uint8_t tmp = GetOuterThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + tmp = GetInnerThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + tmp = GetHevThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + int32_t p = kNumCoeffs / 32; \ + for (int j = 0; j < kNumCoeffs; ++j) { \ + s[j] = rnd.Rand16() & mask_; \ + } \ + for (int i = 0; i < count_test_block; ++i) { \ + call_filter(s + 8 + p * 8, p, blimit, limit, thresh, bd, \ + loopfilter_op_); \ + } \ + } while (false) + +#if CONFIG_AV1_HIGHBITDEPTH +TEST_P(Loop8Test6Param_hbd, DISABLED_Speed) { SPEEDCHECK(uint16_t, 16); } +#endif +TEST_P(Loop8Test6Param_lbd, DISABLED_Speed) { SPEEDCHECK(uint8_t, 8); } + +#define OPCHECKd(a, b) \ + do { \ + ACMRandom rnd(ACMRandom::DeterministicSeed()); \ + const int count_test_block = number_of_iterations; \ + DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \ + DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]); \ + int err_count_total = 0; \ + int first_failure = -1; \ + for (int i = 0; i < count_test_block; ++i) { \ + int err_count = 0; \ + uint8_t tmp = GetOuterThresh(&rnd); \ + DECLARE_ALIGNED( \ + 16, const uint8_t, blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetInnerThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, limit0[16]) = { tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetHevThresh(&rnd); \ + DECLARE_ALIGNED( \ + 16, const uint8_t, thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetOuterThresh(&rnd); \ + DECLARE_ALIGNED( \ + 16, const uint8_t, blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetInnerThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, limit1[16]) = { tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetHevThresh(&rnd); \ + DECLARE_ALIGNED( \ + 16, const uint8_t, thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + int32_t p = kNumCoeffs / 32; \ + const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1; \ + InitInput<a, b>(s, ref_s, &rnd, limit, mask_, p, i); \ + call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \ + limit1, thresh1, bit_depth_, ref_loopfilter_op_); \ + API_REGISTER_STATE_CHECK( \ + call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \ + limit1, thresh1, bit_depth_, loopfilter_op_)); \ + for (int j = 0; j < kNumCoeffs; ++j) { \ + err_count += ref_s[j] != s[j]; \ + } \ + if (err_count && !err_count_total) { \ + first_failure = i; \ + } \ + err_count_total += err_count; \ + } \ + EXPECT_EQ(0, err_count_total) \ + << "Error: Loop8Test9Param, C output doesn't match SIMD " \ + "loopfilter output. " \ + << "First failed at test case " << first_failure; \ + } while (false) + +#if CONFIG_AV1_HIGHBITDEPTH +TEST_P(Loop8Test9Param_hbd, OperationCheck) { OPCHECKd(uint16_t, 16); } +#endif +TEST_P(Loop8Test9Param_lbd, OperationCheck) { OPCHECKd(uint8_t, 8); } + +#define VALCHECKd(a, b) \ + do { \ + ACMRandom rnd(ACMRandom::DeterministicSeed()); \ + const int count_test_block = number_of_iterations; \ + DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \ + DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]); \ + int err_count_total = 0; \ + int first_failure = -1; \ + for (int i = 0; i < count_test_block; ++i) { \ + int err_count = 0; \ + uint8_t tmp = GetOuterThresh(&rnd); \ + DECLARE_ALIGNED( \ + 16, const uint8_t, blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetInnerThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, limit0[16]) = { tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetHevThresh(&rnd); \ + DECLARE_ALIGNED( \ + 16, const uint8_t, thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetOuterThresh(&rnd); \ + DECLARE_ALIGNED( \ + 16, const uint8_t, blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetInnerThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, limit1[16]) = { tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + tmp = GetHevThresh(&rnd); \ + DECLARE_ALIGNED( \ + 16, const uint8_t, thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp }; \ + int32_t p = kNumCoeffs / 32; \ + for (int j = 0; j < kNumCoeffs; ++j) { \ + s[j] = rnd.Rand16() & mask_; \ + ref_s[j] = s[j]; \ + } \ + call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \ + limit1, thresh1, bit_depth_, ref_loopfilter_op_); \ + API_REGISTER_STATE_CHECK( \ + call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \ + limit1, thresh1, bit_depth_, loopfilter_op_)); \ + for (int j = 0; j < kNumCoeffs; ++j) { \ + err_count += ref_s[j] != s[j]; \ + } \ + if (err_count && !err_count_total) { \ + first_failure = i; \ + } \ + err_count_total += err_count; \ + } \ + EXPECT_EQ(0, err_count_total) \ + << "Error: Loop8Test9Param, C output doesn't match SIMD " \ + "loopfilter output. " \ + << "First failed at test case " << first_failure; \ + } while (false) + +#if CONFIG_AV1_HIGHBITDEPTH +TEST_P(Loop8Test9Param_hbd, ValueCheck) { VALCHECKd(uint16_t, 16); } +#endif +TEST_P(Loop8Test9Param_lbd, ValueCheck) { VALCHECKd(uint8_t, 8); } + +#define SPEEDCHECKd(a, b) \ + do { \ + ACMRandom rnd(ACMRandom::DeterministicSeed()); \ + const int count_test_block = kSpeedTestNum; \ + DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \ + uint8_t tmp = GetOuterThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + tmp = GetInnerThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + tmp = GetHevThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + tmp = GetOuterThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + tmp = GetInnerThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + tmp = GetHevThresh(&rnd); \ + DECLARE_ALIGNED(16, const uint8_t, \ + thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \ + tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \ + int32_t p = kNumCoeffs / 32; \ + for (int j = 0; j < kNumCoeffs; ++j) { \ + s[j] = rnd.Rand16() & mask_; \ + } \ + for (int i = 0; i < count_test_block; ++i) { \ + call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \ + limit1, thresh1, bit_depth_, loopfilter_op_); \ + } \ + } while (false) + +#if CONFIG_AV1_HIGHBITDEPTH +TEST_P(Loop8Test9Param_hbd, DISABLED_Speed) { SPEEDCHECKd(uint16_t, 16); } +#endif +TEST_P(Loop8Test9Param_lbd, DISABLED_Speed) { SPEEDCHECKd(uint8_t, 8); } + +using std::make_tuple; + +#if HAVE_SSE2 +#if CONFIG_AV1_HIGHBITDEPTH +const hbdloop_param_t kHbdLoop8Test6[] = { + make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c, + 8), + make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c, + 8), + make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c, + 8), + make_tuple(&aom_highbd_lpf_horizontal_14_sse2, + &aom_highbd_lpf_horizontal_14_c, 8), + make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 8), + make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8), + + make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c, + 8), + make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c, + 10), + make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c, + 10), + make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c, + 10), + make_tuple(&aom_highbd_lpf_horizontal_14_sse2, + &aom_highbd_lpf_horizontal_14_c, 10), + make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 10), + make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10), + make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c, + 10), + make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c, + 12), + make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c, + 12), + make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c, + 12), + make_tuple(&aom_highbd_lpf_horizontal_14_sse2, + &aom_highbd_lpf_horizontal_14_c, 12), + make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c, + 12), + make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 12), + make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12) +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, Loop8Test6Param_hbd, + ::testing::ValuesIn(kHbdLoop8Test6)); +#endif // CONFIG_AV1_HIGHBITDEPTH + +const loop_param_t kLoop8Test6[] = { + make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8), + make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8), + make_tuple(&aom_lpf_horizontal_6_sse2, &aom_lpf_horizontal_6_c, 8), + make_tuple(&aom_lpf_vertical_6_sse2, &aom_lpf_vertical_6_c, 8), + make_tuple(&aom_lpf_horizontal_14_sse2, &aom_lpf_horizontal_14_c, 8), + make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8), + make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8), + make_tuple(&aom_lpf_vertical_14_sse2, &aom_lpf_vertical_14_c, 8), + make_tuple(&aom_lpf_horizontal_4_quad_sse2, &aom_lpf_horizontal_4_quad_c, 8), + make_tuple(&aom_lpf_vertical_4_quad_sse2, &aom_lpf_vertical_4_quad_c, 8), + make_tuple(&aom_lpf_horizontal_6_quad_sse2, &aom_lpf_horizontal_6_quad_c, 8), + make_tuple(&aom_lpf_vertical_6_quad_sse2, &aom_lpf_vertical_6_quad_c, 8), + make_tuple(&aom_lpf_horizontal_8_quad_sse2, &aom_lpf_horizontal_8_quad_c, 8), + make_tuple(&aom_lpf_vertical_8_quad_sse2, &aom_lpf_vertical_8_quad_c, 8), + make_tuple(&aom_lpf_horizontal_14_quad_sse2, &aom_lpf_horizontal_14_quad_c, + 8), + make_tuple(&aom_lpf_vertical_14_quad_sse2, &aom_lpf_vertical_14_quad_c, 8) +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, Loop8Test6Param_lbd, + ::testing::ValuesIn(kLoop8Test6)); + +const dual_loop_param_t kLoop8Test9[] = { + make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_lpf_vertical_4_dual_sse2, &aom_lpf_vertical_4_dual_c, 8), + make_tuple(&aom_lpf_horizontal_6_dual_sse2, &aom_lpf_horizontal_6_dual_c, 8), + make_tuple(&aom_lpf_vertical_6_dual_sse2, &aom_lpf_vertical_6_dual_c, 8), + make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_lpf_vertical_8_dual_sse2, &aom_lpf_vertical_8_dual_c, 8), + make_tuple(&aom_lpf_horizontal_14_dual_sse2, &aom_lpf_horizontal_14_dual_c, + 8), + make_tuple(&aom_lpf_vertical_14_dual_sse2, &aom_lpf_vertical_14_dual_c, 8) +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, Loop8Test9Param_lbd, + ::testing::ValuesIn(kLoop8Test9)); + +#endif // HAVE_SSE2 + +#if HAVE_AVX2 +const loop_param_t kLoop8Test6Avx2[] = { + make_tuple(&aom_lpf_horizontal_6_quad_avx2, &aom_lpf_horizontal_6_quad_c, 8), + make_tuple(&aom_lpf_horizontal_8_quad_avx2, &aom_lpf_horizontal_8_quad_c, 8), + make_tuple(&aom_lpf_horizontal_14_quad_avx2, &aom_lpf_horizontal_14_quad_c, + 8), + make_tuple(&aom_lpf_vertical_14_quad_avx2, &aom_lpf_vertical_14_quad_c, 8), +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, Loop8Test6Param_lbd, + ::testing::ValuesIn(kLoop8Test6Avx2)); +#endif + +#if HAVE_SSE2 && CONFIG_AV1_HIGHBITDEPTH +const hbddual_loop_param_t kHbdLoop8Test9[] = { + make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2, + &aom_highbd_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2, + &aom_highbd_lpf_horizontal_6_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2, + &aom_highbd_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2, + &aom_highbd_lpf_horizontal_14_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2, + &aom_highbd_lpf_vertical_4_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2, + &aom_highbd_lpf_vertical_6_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2, + &aom_highbd_lpf_vertical_8_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2, + &aom_highbd_lpf_vertical_14_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2, + &aom_highbd_lpf_horizontal_4_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2, + &aom_highbd_lpf_horizontal_6_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2, + &aom_highbd_lpf_horizontal_8_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2, + &aom_highbd_lpf_horizontal_14_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2, + &aom_highbd_lpf_vertical_4_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2, + &aom_highbd_lpf_vertical_6_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2, + &aom_highbd_lpf_vertical_8_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2, + &aom_highbd_lpf_vertical_14_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2, + &aom_highbd_lpf_horizontal_4_dual_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2, + &aom_highbd_lpf_horizontal_6_dual_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2, + &aom_highbd_lpf_horizontal_8_dual_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2, + &aom_highbd_lpf_horizontal_14_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2, + &aom_highbd_lpf_vertical_4_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2, + &aom_highbd_lpf_vertical_6_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2, + &aom_highbd_lpf_vertical_8_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2, + &aom_highbd_lpf_vertical_14_dual_c, 12), +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, Loop8Test9Param_hbd, + ::testing::ValuesIn(kHbdLoop8Test9)); + +#endif // HAVE_SSE2 && CONFIG_AV1_HIGHBITDEPTH + +#if HAVE_NEON +const loop_param_t kLoop8Test6[] = { + make_tuple(&aom_lpf_vertical_14_neon, &aom_lpf_vertical_14_c, 8), + make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8), + make_tuple(&aom_lpf_vertical_6_neon, &aom_lpf_vertical_6_c, 8), + make_tuple(&aom_lpf_vertical_4_neon, &aom_lpf_vertical_4_c, 8), + make_tuple(&aom_lpf_horizontal_14_neon, &aom_lpf_horizontal_14_c, 8), + make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8), + make_tuple(&aom_lpf_horizontal_6_neon, &aom_lpf_horizontal_6_c, 8), + make_tuple(&aom_lpf_horizontal_4_neon, &aom_lpf_horizontal_4_c, 8), + make_tuple(&aom_lpf_horizontal_4_quad_neon, &aom_lpf_horizontal_4_quad_c, 8), + make_tuple(&aom_lpf_vertical_4_quad_neon, &aom_lpf_vertical_4_quad_c, 8), + make_tuple(&aom_lpf_horizontal_6_quad_neon, &aom_lpf_horizontal_6_quad_c, 8), + make_tuple(&aom_lpf_vertical_6_quad_neon, &aom_lpf_vertical_6_quad_c, 8), + make_tuple(&aom_lpf_horizontal_8_quad_neon, &aom_lpf_horizontal_8_quad_c, 8), + make_tuple(&aom_lpf_vertical_8_quad_neon, &aom_lpf_vertical_8_quad_c, 8), + make_tuple(&aom_lpf_horizontal_14_quad_neon, &aom_lpf_horizontal_14_quad_c, + 8), + make_tuple(&aom_lpf_vertical_14_quad_neon, &aom_lpf_vertical_14_quad_c, 8) +}; + +INSTANTIATE_TEST_SUITE_P(NEON, Loop8Test6Param_lbd, + ::testing::ValuesIn(kLoop8Test6)); + +const dual_loop_param_t kLoop8Test9[] = { + make_tuple(&aom_lpf_horizontal_4_dual_neon, &aom_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_lpf_horizontal_6_dual_neon, &aom_lpf_horizontal_6_dual_c, 8), + make_tuple(&aom_lpf_horizontal_8_dual_neon, &aom_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_lpf_horizontal_14_dual_neon, &aom_lpf_horizontal_14_dual_c, + 8), + make_tuple(&aom_lpf_vertical_4_dual_neon, &aom_lpf_vertical_4_dual_c, 8), + make_tuple(&aom_lpf_vertical_6_dual_neon, &aom_lpf_vertical_6_dual_c, 8), + make_tuple(&aom_lpf_vertical_8_dual_neon, &aom_lpf_vertical_8_dual_c, 8), + make_tuple(&aom_lpf_vertical_14_dual_neon, &aom_lpf_vertical_14_dual_c, 8) +}; + +INSTANTIATE_TEST_SUITE_P(NEON, Loop8Test9Param_lbd, + ::testing::ValuesIn(kLoop8Test9)); +#if CONFIG_AV1_HIGHBITDEPTH +const hbdloop_param_t kHbdLoop8Test6[] = { + make_tuple(&aom_highbd_lpf_horizontal_4_neon, &aom_highbd_lpf_horizontal_4_c, + 8), + make_tuple(&aom_highbd_lpf_horizontal_4_neon, &aom_highbd_lpf_horizontal_4_c, + 10), + make_tuple(&aom_highbd_lpf_horizontal_4_neon, &aom_highbd_lpf_horizontal_4_c, + 12), + make_tuple(&aom_highbd_lpf_horizontal_6_neon, &aom_highbd_lpf_horizontal_6_c, + 8), + make_tuple(&aom_highbd_lpf_horizontal_6_neon, &aom_highbd_lpf_horizontal_6_c, + 10), + make_tuple(&aom_highbd_lpf_horizontal_6_neon, &aom_highbd_lpf_horizontal_6_c, + 12), + make_tuple(&aom_highbd_lpf_horizontal_8_neon, &aom_highbd_lpf_horizontal_8_c, + 8), + make_tuple(&aom_highbd_lpf_horizontal_8_neon, &aom_highbd_lpf_horizontal_8_c, + 10), + make_tuple(&aom_highbd_lpf_horizontal_8_neon, &aom_highbd_lpf_horizontal_8_c, + 12), + make_tuple(&aom_highbd_lpf_horizontal_14_neon, + &aom_highbd_lpf_horizontal_14_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_14_neon, + &aom_highbd_lpf_horizontal_14_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_14_neon, + &aom_highbd_lpf_horizontal_14_c, 12), + make_tuple(&aom_highbd_lpf_vertical_4_neon, &aom_highbd_lpf_vertical_4_c, 8), + make_tuple(&aom_highbd_lpf_vertical_4_neon, &aom_highbd_lpf_vertical_4_c, 10), + make_tuple(&aom_highbd_lpf_vertical_4_neon, &aom_highbd_lpf_vertical_4_c, 12), + make_tuple(&aom_highbd_lpf_vertical_6_neon, &aom_highbd_lpf_vertical_6_c, 8), + make_tuple(&aom_highbd_lpf_vertical_6_neon, &aom_highbd_lpf_vertical_6_c, 10), + make_tuple(&aom_highbd_lpf_vertical_6_neon, &aom_highbd_lpf_vertical_6_c, 12), + make_tuple(&aom_highbd_lpf_vertical_8_neon, &aom_highbd_lpf_vertical_8_c, 8), + make_tuple(&aom_highbd_lpf_vertical_8_neon, &aom_highbd_lpf_vertical_8_c, 10), + make_tuple(&aom_highbd_lpf_vertical_8_neon, &aom_highbd_lpf_vertical_8_c, 12), + make_tuple(&aom_highbd_lpf_vertical_14_neon, &aom_highbd_lpf_vertical_14_c, + 8), + make_tuple(&aom_highbd_lpf_vertical_14_neon, &aom_highbd_lpf_vertical_14_c, + 10), + make_tuple(&aom_highbd_lpf_vertical_14_neon, &aom_highbd_lpf_vertical_14_c, + 12), +}; + +INSTANTIATE_TEST_SUITE_P(NEON, Loop8Test6Param_hbd, + ::testing::ValuesIn(kHbdLoop8Test6)); + +const hbddual_loop_param_t kHbdLoop8Test9[] = { + make_tuple(&aom_highbd_lpf_horizontal_4_dual_neon, + &aom_highbd_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_6_dual_neon, + &aom_highbd_lpf_horizontal_6_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_neon, + &aom_highbd_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_14_dual_neon, + &aom_highbd_lpf_horizontal_14_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_4_dual_neon, + &aom_highbd_lpf_vertical_4_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_6_dual_neon, + &aom_highbd_lpf_vertical_6_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_8_dual_neon, + &aom_highbd_lpf_vertical_8_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_14_dual_neon, + &aom_highbd_lpf_vertical_14_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_4_dual_neon, + &aom_highbd_lpf_horizontal_4_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_6_dual_neon, + &aom_highbd_lpf_horizontal_6_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_neon, + &aom_highbd_lpf_horizontal_8_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_14_dual_neon, + &aom_highbd_lpf_horizontal_14_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_4_dual_neon, + &aom_highbd_lpf_vertical_4_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_6_dual_neon, + &aom_highbd_lpf_vertical_6_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_8_dual_neon, + &aom_highbd_lpf_vertical_8_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_14_dual_neon, + &aom_highbd_lpf_vertical_14_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_4_dual_neon, + &aom_highbd_lpf_horizontal_4_dual_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_6_dual_neon, + &aom_highbd_lpf_horizontal_6_dual_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_neon, + &aom_highbd_lpf_horizontal_8_dual_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_14_dual_neon, + &aom_highbd_lpf_horizontal_14_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_4_dual_neon, + &aom_highbd_lpf_vertical_4_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_6_dual_neon, + &aom_highbd_lpf_vertical_6_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_8_dual_neon, + &aom_highbd_lpf_vertical_8_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_14_dual_neon, + &aom_highbd_lpf_vertical_14_dual_c, 12), +}; + +INSTANTIATE_TEST_SUITE_P(NEON, Loop8Test9Param_hbd, + ::testing::ValuesIn(kHbdLoop8Test9)); + +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_NEON + +#if HAVE_AVX2 && CONFIG_AV1_HIGHBITDEPTH +const hbddual_loop_param_t kHbdLoop8Test9Avx2[] = { + make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2, + &aom_highbd_lpf_horizontal_4_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2, + &aom_highbd_lpf_horizontal_4_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2, + &aom_highbd_lpf_horizontal_4_dual_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2, + &aom_highbd_lpf_horizontal_8_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2, + &aom_highbd_lpf_horizontal_8_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2, + &aom_highbd_lpf_horizontal_8_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2, + &aom_highbd_lpf_vertical_4_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2, + &aom_highbd_lpf_vertical_4_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2, + &aom_highbd_lpf_vertical_4_dual_c, 12), + make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2, + &aom_highbd_lpf_vertical_8_dual_c, 8), + make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2, + &aom_highbd_lpf_vertical_8_dual_c, 10), + make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2, + &aom_highbd_lpf_vertical_8_dual_c, 12), +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, Loop8Test9Param_hbd, + ::testing::ValuesIn(kHbdLoop8Test9Avx2)); +#endif +} // namespace diff --git a/third_party/aom/test/masked_sad_test.cc b/third_party/aom/test/masked_sad_test.cc new file mode 100644 index 0000000000..bb037460d1 --- /dev/null +++ b/third_party/aom/test/masked_sad_test.cc @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom/aom_integer.h" + +using libaom_test::ACMRandom; + +namespace { +const int number_of_iterations = 200; + +typedef unsigned int (*MaskedSADFunc)(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + const uint8_t *second_pred, + const uint8_t *msk, int msk_stride, + int invert_mask); +typedef std::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam; + +typedef void (*MaskedSADx4Func)(const uint8_t *src, int src_stride, + const uint8_t *ref[], int ref_stride, + const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int invert_mask, + unsigned sads[]); + +typedef std::tuple<MaskedSADx4Func, MaskedSADx4Func> MaskedSADx4Param; + +class MaskedSADTestBase : public ::testing::Test { + public: + ~MaskedSADTestBase() override = default; + void SetUp() override = 0; + virtual void runRef(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr[], int ref_stride, + const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int inv_mask, unsigned sads[], + int times) = 0; + virtual void runTest(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr[], int ref_stride, + const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int inv_mask, unsigned sads[], + int times) = 0; + + void runMaskedSADTest(int run_times); +}; + +class MaskedSADTest : public MaskedSADTestBase, + public ::testing::WithParamInterface<MaskedSADParam> { + public: + ~MaskedSADTest() override = default; + void SetUp() override { + maskedSAD_op_ = GET_PARAM(0); + ref_maskedSAD_op_ = GET_PARAM(1); + } + + void runRef(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr[], + int ref_stride, const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int inv_mask, unsigned sads[], + int times) override; + void runTest(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr[], + int ref_stride, const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int inv_mask, unsigned sads[], + int times) override; + + protected: + MaskedSADFunc maskedSAD_op_; + MaskedSADFunc ref_maskedSAD_op_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MaskedSADTest); + +class MaskedSADx4Test : public MaskedSADTestBase, + public ::testing::WithParamInterface<MaskedSADx4Param> { + public: + ~MaskedSADx4Test() override = default; + void SetUp() override { + maskedSAD_op_ = GET_PARAM(0); + ref_maskedSAD_op_ = GET_PARAM(1); + } + void runRef(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr[], + int ref_stride, const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int inv_mask, unsigned sads[], + int times) override; + void runTest(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr[], + int ref_stride, const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int inv_mask, unsigned sads[], + int times) override; + + protected: + MaskedSADx4Func maskedSAD_op_; + MaskedSADx4Func ref_maskedSAD_op_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MaskedSADx4Test); + +void MaskedSADTest::runRef(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr[], int ref_stride, + const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int invert_mask, unsigned sads[], + int times) { + for (int repeat = 0; repeat < times; ++repeat) { + sads[0] = ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr[0], ref_stride, + second_pred, msk, msk_stride, invert_mask); + } +} + +void MaskedSADTest::runTest(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr[], int ref_stride, + const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int invert_mask, unsigned sads[], + int times) { + if (times == 1) { + sads[0] = maskedSAD_op_(src_ptr, src_stride, ref_ptr[0], ref_stride, + second_pred, msk, msk_stride, invert_mask); + } else { + for (int repeat = 0; repeat < times; ++repeat) { + API_REGISTER_STATE_CHECK( + sads[0] = maskedSAD_op_(src_ptr, src_stride, ref_ptr[0], ref_stride, + second_pred, msk, msk_stride, invert_mask)); + } + } +} + +void MaskedSADx4Test::runRef(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr[], int ref_stride, + const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int invert_mask, unsigned sads[], + int times) { + for (int repeat = 0; repeat < times; ++repeat) { + ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, + msk, msk_stride, invert_mask, sads); + } +} + +void MaskedSADx4Test::runTest(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr[], int ref_stride, + const uint8_t *second_pred, const uint8_t *msk, + int msk_stride, int invert_mask, unsigned sads[], + int times) { + if (times == 1) { + API_REGISTER_STATE_CHECK(maskedSAD_op_(src_ptr, src_stride, ref_ptr, + ref_stride, second_pred, msk, + msk_stride, invert_mask, sads)); + } else { + for (int repeat = 0; repeat < times; ++repeat) { + maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, msk, + msk_stride, invert_mask, sads); + } + } +} + +void MaskedSADTestBase::runMaskedSADTest(int run_times) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const unsigned kBlockSize = MAX_SB_SIZE * MAX_SB_SIZE; + DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE * 4]); + DECLARE_ALIGNED(16, uint8_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + + const uint8_t *refs[] = { ref_ptr, ref_ptr + kBlockSize, + ref_ptr + 2 * kBlockSize, + ref_ptr + 3 * kBlockSize }; + unsigned sads[] = { 0, 0, 0, 0 }; + unsigned sads_ref[] = { 0, 0, 0, 0 }; + int err_count = 0; + int first_failure = -1; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; + const int iters = run_times == 1 ? number_of_iterations : 1; + for (int i = 0; i < iters; ++i) { + if (run_times == 1 && i == 0) { + // The maximum accumulator value occurs when src=0 and + // ref/second_pref=255 (or vice-versa, since we take the absolute + // difference). Check this case explicitly to ensure we do not overflow + // during accumulation. + for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) { + src_ptr[j] = 0; + ref_ptr[j] = 255; + (ref_ptr + kBlockSize)[j] = 255; + (ref_ptr + 2 * kBlockSize)[j] = 255; + (ref_ptr + 3 * kBlockSize)[j] = 255; + second_pred_ptr[j] = 255; + } + } else { + for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) { + src_ptr[j] = rnd.Rand8(); + ref_ptr[j] = rnd.Rand8(); + (ref_ptr + kBlockSize)[j] = rnd.Rand8(); + (ref_ptr + 2 * kBlockSize)[j] = rnd.Rand8(); + (ref_ptr + 3 * kBlockSize)[j] = rnd.Rand8(); + second_pred_ptr[j] = rnd.Rand8(); + } + } + for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) { + msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64; + assert(msk_ptr[j] <= 64); + } + + for (int invert_mask = 0; invert_mask < 2; ++invert_mask) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + runRef(src_ptr, src_stride, refs, ref_stride, second_pred_ptr, msk_ptr, + msk_stride, invert_mask, sads_ref, run_times); + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + aom_usec_timer_start(&timer); + runTest(src_ptr, src_stride, refs, ref_stride, second_pred_ptr, msk_ptr, + msk_stride, invert_mask, sads, run_times); + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + if (run_times > 10) { + printf("%7.2f/%7.2fns", time1, time2); + printf("(%3.2f)\n", time1 / time2); + } + if (sads_ref[0] != sads[0] || sads_ref[1] != sads[1] || + sads_ref[2] != sads[2] || sads_ref[3] != sads[3]) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + } + EXPECT_EQ(0, err_count) << "Error: Masked SAD Test, output doesn't match. " + << "First failed at test case " << first_failure; +} + +TEST_P(MaskedSADTest, OperationCheck) { runMaskedSADTest(1); } + +TEST_P(MaskedSADTest, DISABLED_Speed) { runMaskedSADTest(2000000); } + +TEST_P(MaskedSADx4Test, OperationCheck) { runMaskedSADTest(1); } + +TEST_P(MaskedSADx4Test, DISABLED_Speed) { runMaskedSADTest(2000000); } + +#if CONFIG_AV1_HIGHBITDEPTH +typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + const uint8_t *second_pred, + const uint8_t *msk, int msk_stride, + int invert_mask); +typedef std::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc> + HighbdMaskedSADParam; + +class HighbdMaskedSADTest + : public ::testing::TestWithParam<HighbdMaskedSADParam> { + public: + ~HighbdMaskedSADTest() override = default; + void SetUp() override { + maskedSAD_op_ = GET_PARAM(0); + ref_maskedSAD_op_ = GET_PARAM(1); + } + + void runHighbdMaskedSADTest(int run_times); + + protected: + HighbdMaskedSADFunc maskedSAD_op_; + HighbdMaskedSADFunc ref_maskedSAD_op_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HighbdMaskedSADTest); + +void HighbdMaskedSADTest::runHighbdMaskedSADTest(int run_times) { + unsigned int ref_ret = 0, ret = 1; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint16_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]); + uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); + uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); + uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr); + int err_count = 0; + int first_failure = -1; + int src_stride = MAX_SB_SIZE; + int ref_stride = MAX_SB_SIZE; + int msk_stride = MAX_SB_SIZE; + const int iters = run_times == 1 ? number_of_iterations : 1; + for (int i = 0; i < iters; ++i) { + for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) { + src_ptr[j] = rnd.Rand16() & 0xfff; + ref_ptr[j] = rnd.Rand16() & 0xfff; + second_pred_ptr[j] = rnd.Rand16() & 0xfff; + msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64; + } + + for (int invert_mask = 0; invert_mask < 2; ++invert_mask) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int repeat = 0; repeat < run_times; ++repeat) { + ref_ret = ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride, + second_pred8_ptr, msk_ptr, msk_stride, + invert_mask); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + if (run_times == 1) { + API_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride, + ref8_ptr, ref_stride, + second_pred8_ptr, msk_ptr, + msk_stride, invert_mask)); + } else { + for (int repeat = 0; repeat < run_times; ++repeat) { + ret = + maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride, + second_pred8_ptr, msk_ptr, msk_stride, invert_mask); + } + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 10) { + printf("%7.2f/%7.2fns", time1, time2); + printf("(%3.2f)\n", time1 / time2); + } + if (ret != ref_ret) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + } + EXPECT_EQ(0, err_count) + << "Error: High BD Masked SAD Test, output doesn't match. " + << "First failed at test case " << first_failure; +} + +TEST_P(HighbdMaskedSADTest, OperationCheck) { runHighbdMaskedSADTest(1); } + +TEST_P(HighbdMaskedSADTest, DISABLED_Speed) { runHighbdMaskedSADTest(1000000); } +#endif // CONFIG_AV1_HIGHBITDEPTH + +using std::make_tuple; + +#if HAVE_SSSE3 +const MaskedSADParam msad_test[] = { + make_tuple(&aom_masked_sad4x4_ssse3, &aom_masked_sad4x4_c), + make_tuple(&aom_masked_sad4x8_ssse3, &aom_masked_sad4x8_c), + make_tuple(&aom_masked_sad8x4_ssse3, &aom_masked_sad8x4_c), + make_tuple(&aom_masked_sad8x8_ssse3, &aom_masked_sad8x8_c), + make_tuple(&aom_masked_sad8x16_ssse3, &aom_masked_sad8x16_c), + make_tuple(&aom_masked_sad16x8_ssse3, &aom_masked_sad16x8_c), + make_tuple(&aom_masked_sad16x16_ssse3, &aom_masked_sad16x16_c), + make_tuple(&aom_masked_sad16x32_ssse3, &aom_masked_sad16x32_c), + make_tuple(&aom_masked_sad32x16_ssse3, &aom_masked_sad32x16_c), + make_tuple(&aom_masked_sad32x32_ssse3, &aom_masked_sad32x32_c), + make_tuple(&aom_masked_sad32x64_ssse3, &aom_masked_sad32x64_c), + make_tuple(&aom_masked_sad64x32_ssse3, &aom_masked_sad64x32_c), + make_tuple(&aom_masked_sad64x64_ssse3, &aom_masked_sad64x64_c), + make_tuple(&aom_masked_sad64x128_ssse3, &aom_masked_sad64x128_c), + make_tuple(&aom_masked_sad128x64_ssse3, &aom_masked_sad128x64_c), + make_tuple(&aom_masked_sad128x128_ssse3, &aom_masked_sad128x128_c), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_masked_sad4x16_ssse3, &aom_masked_sad4x16_c), + make_tuple(&aom_masked_sad16x4_ssse3, &aom_masked_sad16x4_c), + make_tuple(&aom_masked_sad8x32_ssse3, &aom_masked_sad8x32_c), + make_tuple(&aom_masked_sad32x8_ssse3, &aom_masked_sad32x8_c), + make_tuple(&aom_masked_sad16x64_ssse3, &aom_masked_sad16x64_c), + make_tuple(&aom_masked_sad64x16_ssse3, &aom_masked_sad64x16_c), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(SSSE3, MaskedSADTest, ::testing::ValuesIn(msad_test)); + +const MaskedSADx4Param msadx4_test[] = { + make_tuple(&aom_masked_sad4x4x4d_ssse3, &aom_masked_sad4x4x4d_c), + make_tuple(&aom_masked_sad4x8x4d_ssse3, &aom_masked_sad4x8x4d_c), + make_tuple(&aom_masked_sad8x4x4d_ssse3, &aom_masked_sad8x4x4d_c), + make_tuple(&aom_masked_sad8x8x4d_ssse3, &aom_masked_sad8x8x4d_c), + make_tuple(&aom_masked_sad8x16x4d_ssse3, &aom_masked_sad8x16x4d_c), + make_tuple(&aom_masked_sad16x8x4d_ssse3, &aom_masked_sad16x8x4d_c), + make_tuple(&aom_masked_sad16x16x4d_ssse3, &aom_masked_sad16x16x4d_c), + make_tuple(&aom_masked_sad16x32x4d_ssse3, &aom_masked_sad16x32x4d_c), + make_tuple(&aom_masked_sad32x16x4d_ssse3, &aom_masked_sad32x16x4d_c), + make_tuple(&aom_masked_sad32x32x4d_ssse3, &aom_masked_sad32x32x4d_c), + make_tuple(&aom_masked_sad32x64x4d_ssse3, &aom_masked_sad32x64x4d_c), + make_tuple(&aom_masked_sad64x32x4d_ssse3, &aom_masked_sad64x32x4d_c), + make_tuple(&aom_masked_sad64x64x4d_ssse3, &aom_masked_sad64x64x4d_c), + make_tuple(&aom_masked_sad64x128x4d_ssse3, &aom_masked_sad64x128x4d_c), + make_tuple(&aom_masked_sad128x64x4d_ssse3, &aom_masked_sad128x64x4d_c), + make_tuple(&aom_masked_sad128x128x4d_ssse3, &aom_masked_sad128x128x4d_c), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_masked_sad4x16x4d_ssse3, &aom_masked_sad4x16x4d_c), + make_tuple(&aom_masked_sad16x4x4d_ssse3, &aom_masked_sad16x4x4d_c), + make_tuple(&aom_masked_sad8x32x4d_ssse3, &aom_masked_sad8x32x4d_c), + make_tuple(&aom_masked_sad32x8x4d_ssse3, &aom_masked_sad32x8x4d_c), + make_tuple(&aom_masked_sad16x64x4d_ssse3, &aom_masked_sad16x64x4d_c), + make_tuple(&aom_masked_sad64x16x4d_ssse3, &aom_masked_sad64x16x4d_c), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(SSSE3, MaskedSADx4Test, + ::testing::ValuesIn(msadx4_test)); + +#if CONFIG_AV1_HIGHBITDEPTH +const HighbdMaskedSADParam hbd_msad_test[] = { + make_tuple(&aom_highbd_masked_sad4x4_ssse3, &aom_highbd_masked_sad4x4_c), + make_tuple(&aom_highbd_masked_sad4x8_ssse3, &aom_highbd_masked_sad4x8_c), + make_tuple(&aom_highbd_masked_sad8x4_ssse3, &aom_highbd_masked_sad8x4_c), + make_tuple(&aom_highbd_masked_sad8x8_ssse3, &aom_highbd_masked_sad8x8_c), + make_tuple(&aom_highbd_masked_sad8x16_ssse3, &aom_highbd_masked_sad8x16_c), + make_tuple(&aom_highbd_masked_sad16x8_ssse3, &aom_highbd_masked_sad16x8_c), + make_tuple(&aom_highbd_masked_sad16x16_ssse3, &aom_highbd_masked_sad16x16_c), + make_tuple(&aom_highbd_masked_sad16x32_ssse3, &aom_highbd_masked_sad16x32_c), + make_tuple(&aom_highbd_masked_sad32x16_ssse3, &aom_highbd_masked_sad32x16_c), + make_tuple(&aom_highbd_masked_sad32x32_ssse3, &aom_highbd_masked_sad32x32_c), + make_tuple(&aom_highbd_masked_sad32x64_ssse3, &aom_highbd_masked_sad32x64_c), + make_tuple(&aom_highbd_masked_sad64x32_ssse3, &aom_highbd_masked_sad64x32_c), + make_tuple(&aom_highbd_masked_sad64x64_ssse3, &aom_highbd_masked_sad64x64_c), + make_tuple(&aom_highbd_masked_sad64x128_ssse3, + &aom_highbd_masked_sad64x128_c), + make_tuple(&aom_highbd_masked_sad128x64_ssse3, + &aom_highbd_masked_sad128x64_c), + make_tuple(&aom_highbd_masked_sad128x128_ssse3, + &aom_highbd_masked_sad128x128_c), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_masked_sad4x16_ssse3, &aom_highbd_masked_sad4x16_c), + make_tuple(&aom_highbd_masked_sad16x4_ssse3, &aom_highbd_masked_sad16x4_c), + make_tuple(&aom_highbd_masked_sad8x32_ssse3, &aom_highbd_masked_sad8x32_c), + make_tuple(&aom_highbd_masked_sad32x8_ssse3, &aom_highbd_masked_sad32x8_c), + make_tuple(&aom_highbd_masked_sad16x64_ssse3, &aom_highbd_masked_sad16x64_c), + make_tuple(&aom_highbd_masked_sad64x16_ssse3, &aom_highbd_masked_sad64x16_c), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(SSSE3, HighbdMaskedSADTest, + ::testing::ValuesIn(hbd_msad_test)); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_SSSE3 + +#if HAVE_AVX2 +const MaskedSADParam msad_avx2_test[] = { + make_tuple(&aom_masked_sad4x4_avx2, &aom_masked_sad4x4_ssse3), + make_tuple(&aom_masked_sad4x8_avx2, &aom_masked_sad4x8_ssse3), + make_tuple(&aom_masked_sad8x4_avx2, &aom_masked_sad8x4_ssse3), + make_tuple(&aom_masked_sad8x8_avx2, &aom_masked_sad8x8_ssse3), + make_tuple(&aom_masked_sad8x16_avx2, &aom_masked_sad8x16_ssse3), + make_tuple(&aom_masked_sad16x8_avx2, &aom_masked_sad16x8_ssse3), + make_tuple(&aom_masked_sad16x16_avx2, &aom_masked_sad16x16_ssse3), + make_tuple(&aom_masked_sad16x32_avx2, &aom_masked_sad16x32_ssse3), + make_tuple(&aom_masked_sad32x16_avx2, &aom_masked_sad32x16_ssse3), + make_tuple(&aom_masked_sad32x32_avx2, &aom_masked_sad32x32_ssse3), + make_tuple(&aom_masked_sad32x64_avx2, &aom_masked_sad32x64_ssse3), + make_tuple(&aom_masked_sad64x32_avx2, &aom_masked_sad64x32_ssse3), + make_tuple(&aom_masked_sad64x64_avx2, &aom_masked_sad64x64_ssse3), + make_tuple(&aom_masked_sad64x128_avx2, &aom_masked_sad64x128_ssse3), + make_tuple(&aom_masked_sad128x64_avx2, &aom_masked_sad128x64_ssse3), + make_tuple(&aom_masked_sad128x128_avx2, &aom_masked_sad128x128_ssse3), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_masked_sad4x16_avx2, &aom_masked_sad4x16_ssse3), + make_tuple(&aom_masked_sad16x4_avx2, &aom_masked_sad16x4_ssse3), + make_tuple(&aom_masked_sad8x32_avx2, &aom_masked_sad8x32_ssse3), + make_tuple(&aom_masked_sad32x8_avx2, &aom_masked_sad32x8_ssse3), + make_tuple(&aom_masked_sad16x64_avx2, &aom_masked_sad16x64_ssse3), + make_tuple(&aom_masked_sad64x16_avx2, &aom_masked_sad64x16_ssse3) +#endif +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, MaskedSADTest, + ::testing::ValuesIn(msad_avx2_test)); + +#if CONFIG_AV1_HIGHBITDEPTH +const HighbdMaskedSADParam hbd_msad_avx2_test[] = { + make_tuple(&aom_highbd_masked_sad4x4_avx2, &aom_highbd_masked_sad4x4_ssse3), + make_tuple(&aom_highbd_masked_sad4x8_avx2, &aom_highbd_masked_sad4x8_ssse3), + make_tuple(&aom_highbd_masked_sad8x4_avx2, &aom_highbd_masked_sad8x4_ssse3), + make_tuple(&aom_highbd_masked_sad8x8_avx2, &aom_highbd_masked_sad8x8_ssse3), + make_tuple(&aom_highbd_masked_sad8x16_avx2, &aom_highbd_masked_sad8x16_ssse3), + make_tuple(&aom_highbd_masked_sad16x8_avx2, &aom_highbd_masked_sad16x8_ssse3), + make_tuple(&aom_highbd_masked_sad16x16_avx2, + &aom_highbd_masked_sad16x16_ssse3), + make_tuple(&aom_highbd_masked_sad16x32_avx2, + &aom_highbd_masked_sad16x32_ssse3), + make_tuple(&aom_highbd_masked_sad32x16_avx2, + &aom_highbd_masked_sad32x16_ssse3), + make_tuple(&aom_highbd_masked_sad32x32_avx2, + &aom_highbd_masked_sad32x32_ssse3), + make_tuple(&aom_highbd_masked_sad32x64_avx2, + &aom_highbd_masked_sad32x64_ssse3), + make_tuple(&aom_highbd_masked_sad64x32_avx2, + &aom_highbd_masked_sad64x32_ssse3), + make_tuple(&aom_highbd_masked_sad64x64_avx2, + &aom_highbd_masked_sad64x64_ssse3), + make_tuple(&aom_highbd_masked_sad64x128_avx2, + &aom_highbd_masked_sad64x128_ssse3), + make_tuple(&aom_highbd_masked_sad128x64_avx2, + &aom_highbd_masked_sad128x64_ssse3), + make_tuple(&aom_highbd_masked_sad128x128_avx2, + &aom_highbd_masked_sad128x128_ssse3), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_masked_sad4x16_avx2, &aom_highbd_masked_sad4x16_ssse3), + make_tuple(&aom_highbd_masked_sad16x4_avx2, &aom_highbd_masked_sad16x4_ssse3), + make_tuple(&aom_highbd_masked_sad8x32_avx2, &aom_highbd_masked_sad8x32_ssse3), + make_tuple(&aom_highbd_masked_sad32x8_avx2, &aom_highbd_masked_sad32x8_ssse3), + make_tuple(&aom_highbd_masked_sad16x64_avx2, + &aom_highbd_masked_sad16x64_ssse3), + make_tuple(&aom_highbd_masked_sad64x16_avx2, + &aom_highbd_masked_sad64x16_ssse3) +#endif +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, HighbdMaskedSADTest, + ::testing::ValuesIn(hbd_msad_avx2_test)); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_AVX2 + +#if HAVE_NEON +const MaskedSADParam msad_test[] = { + make_tuple(&aom_masked_sad4x4_neon, &aom_masked_sad4x4_c), + make_tuple(&aom_masked_sad4x8_neon, &aom_masked_sad4x8_c), + make_tuple(&aom_masked_sad8x4_neon, &aom_masked_sad8x4_c), + make_tuple(&aom_masked_sad8x8_neon, &aom_masked_sad8x8_c), + make_tuple(&aom_masked_sad8x16_neon, &aom_masked_sad8x16_c), + make_tuple(&aom_masked_sad16x8_neon, &aom_masked_sad16x8_c), + make_tuple(&aom_masked_sad16x16_neon, &aom_masked_sad16x16_c), + make_tuple(&aom_masked_sad16x32_neon, &aom_masked_sad16x32_c), + make_tuple(&aom_masked_sad32x16_neon, &aom_masked_sad32x16_c), + make_tuple(&aom_masked_sad32x32_neon, &aom_masked_sad32x32_c), + make_tuple(&aom_masked_sad32x64_neon, &aom_masked_sad32x64_c), + make_tuple(&aom_masked_sad64x32_neon, &aom_masked_sad64x32_c), + make_tuple(&aom_masked_sad64x64_neon, &aom_masked_sad64x64_c), + make_tuple(&aom_masked_sad64x128_neon, &aom_masked_sad64x128_c), + make_tuple(&aom_masked_sad128x64_neon, &aom_masked_sad128x64_c), + make_tuple(&aom_masked_sad128x128_neon, &aom_masked_sad128x128_c), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_masked_sad4x16_neon, &aom_masked_sad4x16_c), + make_tuple(&aom_masked_sad16x4_neon, &aom_masked_sad16x4_c), + make_tuple(&aom_masked_sad8x32_neon, &aom_masked_sad8x32_c), + make_tuple(&aom_masked_sad32x8_neon, &aom_masked_sad32x8_c), + make_tuple(&aom_masked_sad16x64_neon, &aom_masked_sad16x64_c), + make_tuple(&aom_masked_sad64x16_neon, &aom_masked_sad64x16_c), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(NEON, MaskedSADTest, ::testing::ValuesIn(msad_test)); + +const MaskedSADx4Param msadx4_test[] = { + make_tuple(&aom_masked_sad4x4x4d_neon, &aom_masked_sad4x4x4d_c), + make_tuple(&aom_masked_sad4x8x4d_neon, &aom_masked_sad4x8x4d_c), + make_tuple(&aom_masked_sad8x4x4d_neon, &aom_masked_sad8x4x4d_c), + make_tuple(&aom_masked_sad8x8x4d_neon, &aom_masked_sad8x8x4d_c), + make_tuple(&aom_masked_sad8x16x4d_neon, &aom_masked_sad8x16x4d_c), + make_tuple(&aom_masked_sad16x8x4d_neon, &aom_masked_sad16x8x4d_c), + make_tuple(&aom_masked_sad16x16x4d_neon, &aom_masked_sad16x16x4d_c), + make_tuple(&aom_masked_sad16x32x4d_neon, &aom_masked_sad16x32x4d_c), + make_tuple(&aom_masked_sad32x16x4d_neon, &aom_masked_sad32x16x4d_c), + make_tuple(&aom_masked_sad32x32x4d_neon, &aom_masked_sad32x32x4d_c), + make_tuple(&aom_masked_sad32x64x4d_neon, &aom_masked_sad32x64x4d_c), + make_tuple(&aom_masked_sad64x32x4d_neon, &aom_masked_sad64x32x4d_c), + make_tuple(&aom_masked_sad64x64x4d_neon, &aom_masked_sad64x64x4d_c), + make_tuple(&aom_masked_sad64x128x4d_neon, &aom_masked_sad64x128x4d_c), + make_tuple(&aom_masked_sad128x64x4d_neon, &aom_masked_sad128x64x4d_c), + make_tuple(&aom_masked_sad128x128x4d_neon, &aom_masked_sad128x128x4d_c), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_masked_sad4x16x4d_neon, &aom_masked_sad4x16x4d_c), + make_tuple(&aom_masked_sad16x4x4d_neon, &aom_masked_sad16x4x4d_c), + make_tuple(&aom_masked_sad8x32x4d_neon, &aom_masked_sad8x32x4d_c), + make_tuple(&aom_masked_sad32x8x4d_neon, &aom_masked_sad32x8x4d_c), + make_tuple(&aom_masked_sad16x64x4d_neon, &aom_masked_sad16x64x4d_c), + make_tuple(&aom_masked_sad64x16x4d_neon, &aom_masked_sad64x16x4d_c), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(NEON, MaskedSADx4Test, + ::testing::ValuesIn(msadx4_test)); + +#if CONFIG_AV1_HIGHBITDEPTH +const MaskedSADParam hbd_msad_neon_test[] = { + make_tuple(&aom_highbd_masked_sad4x4_neon, &aom_highbd_masked_sad4x4_c), + make_tuple(&aom_highbd_masked_sad4x8_neon, &aom_highbd_masked_sad4x8_c), + make_tuple(&aom_highbd_masked_sad8x4_neon, &aom_highbd_masked_sad8x4_c), + make_tuple(&aom_highbd_masked_sad8x8_neon, &aom_highbd_masked_sad8x8_c), + make_tuple(&aom_highbd_masked_sad8x16_neon, &aom_highbd_masked_sad8x16_c), + make_tuple(&aom_highbd_masked_sad16x8_neon, &aom_highbd_masked_sad16x8_c), + make_tuple(&aom_highbd_masked_sad16x16_neon, &aom_highbd_masked_sad16x16_c), + make_tuple(&aom_highbd_masked_sad16x32_neon, &aom_highbd_masked_sad16x32_c), + make_tuple(&aom_highbd_masked_sad32x16_neon, &aom_highbd_masked_sad32x16_c), + make_tuple(&aom_highbd_masked_sad32x32_neon, &aom_highbd_masked_sad32x32_c), + make_tuple(&aom_highbd_masked_sad32x64_neon, &aom_highbd_masked_sad32x64_c), + make_tuple(&aom_highbd_masked_sad64x32_neon, &aom_highbd_masked_sad64x32_c), + make_tuple(&aom_highbd_masked_sad64x64_neon, &aom_highbd_masked_sad64x64_c), + make_tuple(&aom_highbd_masked_sad64x128_neon, &aom_highbd_masked_sad64x128_c), + make_tuple(&aom_highbd_masked_sad128x64_neon, &aom_highbd_masked_sad128x64_c), + make_tuple(&aom_highbd_masked_sad128x128_neon, + &aom_highbd_masked_sad128x128_c), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_masked_sad4x16_neon, &aom_highbd_masked_sad4x16_c), + make_tuple(&aom_highbd_masked_sad16x4_neon, &aom_highbd_masked_sad16x4_c), + make_tuple(&aom_highbd_masked_sad8x32_neon, &aom_highbd_masked_sad8x32_c), + make_tuple(&aom_highbd_masked_sad32x8_neon, &aom_highbd_masked_sad32x8_c), + make_tuple(&aom_highbd_masked_sad16x64_neon, &aom_highbd_masked_sad16x64_c), + make_tuple(&aom_highbd_masked_sad64x16_neon, &aom_highbd_masked_sad64x16_c), +#endif // !CONFIG_REALTIME_ONLY +}; + +INSTANTIATE_TEST_SUITE_P(NEON, HighbdMaskedSADTest, + ::testing::ValuesIn(hbd_msad_neon_test)); + +#endif // CONFIG_AV1_HIGHBITDEPTH + +#endif // HAVE_NEON + +} // namespace diff --git a/third_party/aom/test/masked_variance_test.cc b/third_party/aom/test/masked_variance_test.cc new file mode 100644 index 0000000000..8482a12f53 --- /dev/null +++ b/third_party/aom/test/masked_variance_test.cc @@ -0,0 +1,712 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_dsp/aom_filter.h" +#include "aom_mem/aom_mem.h" + +using libaom_test::ACMRandom; + +namespace { +const int number_of_iterations = 200; + +typedef unsigned int (*MaskedSubPixelVarianceFunc)( + const uint8_t *src, int src_stride, int xoffset, int yoffset, + const uint8_t *ref, int ref_stride, const uint8_t *second_pred, + const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +typedef std::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc> + MaskedSubPixelVarianceParam; + +class MaskedSubPixelVarianceTest + : public ::testing::TestWithParam<MaskedSubPixelVarianceParam> { + public: + ~MaskedSubPixelVarianceTest() override = default; + void SetUp() override { + opt_func_ = GET_PARAM(0); + ref_func_ = GET_PARAM(1); + } + + protected: + MaskedSubPixelVarianceFunc opt_func_; + MaskedSubPixelVarianceFunc ref_func_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MaskedSubPixelVarianceTest); + +TEST_P(MaskedSubPixelVarianceTest, OperationCheck) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + // Note: We pad out the input array to a multiple of 16 bytes wide, so that + // consecutive rows keep the 16-byte alignment. + DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]); + DECLARE_ALIGNED(16, uint8_t, + second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]); + int err_count = 0; + int first_failure = -1; + int src_stride = (MAX_SB_SIZE + 16); + int ref_stride = (MAX_SB_SIZE + 16); + int msk_stride = (MAX_SB_SIZE + 16); + int xoffset; + int yoffset; + + for (int i = 0; i < number_of_iterations; ++i) { + int xoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) }; + int yoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) }; + for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16); j++) { + src_ptr[j] = rnd.Rand8(); + ref_ptr[j] = rnd.Rand8(); + second_pred_ptr[j] = rnd.Rand8(); + msk_ptr[j] = rnd(65); + } + for (int k = 0; k < 3; k++) { + for (int l = 0; l < 3; l++) { + xoffset = xoffsets[k]; + yoffset = yoffsets[l]; + for (int invert_mask = 0; invert_mask < 2; ++invert_mask) { + ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr, + ref_stride, second_pred_ptr, msk_ptr, msk_stride, + invert_mask, &ref_sse); + API_REGISTER_STATE_CHECK( + opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset, + ref_ptr, ref_stride, second_pred_ptr, msk_ptr, + msk_stride, invert_mask, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) first_failure = i; + } + } + } + } + } + + EXPECT_EQ(0, err_count) + << "Error: Masked Sub Pixel Variance Test OperationCheck," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure; +} + +TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]); + DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]); + DECLARE_ALIGNED(16, uint8_t, + second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]); + int first_failure_x = -1; + int first_failure_y = -1; + int err_count = 0; + int first_failure = -1; + int src_stride = (MAX_SB_SIZE + 16); + int ref_stride = (MAX_SB_SIZE + 16); + int msk_stride = (MAX_SB_SIZE + 16); + + for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) { + for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) { + for (int i = 0; i < 16; ++i) { + memset(src_ptr, (i & 0x1) ? 255 : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)); + memset(ref_ptr, (i & 0x2) ? 255 : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)); + memset(second_pred_ptr, (i & 0x4) ? 255 : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)); + memset(msk_ptr, (i & 0x8) ? 64 : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)); + + for (int invert_mask = 0; invert_mask < 2; ++invert_mask) { + ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr, + ref_stride, second_pred_ptr, msk_ptr, msk_stride, + invert_mask, &ref_sse); + API_REGISTER_STATE_CHECK( + opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset, + ref_ptr, ref_stride, second_pred_ptr, msk_ptr, + msk_stride, invert_mask, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) { + first_failure = i; + first_failure_x = xoffset; + first_failure_y = yoffset; + } + } + } + } + } + } + + EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure + << " x_offset = " << first_failure_x + << " y_offset = " << first_failure_y; +} + +#if CONFIG_AV1_HIGHBITDEPTH +typedef std::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc, + aom_bit_depth_t> + HighbdMaskedSubPixelVarianceParam; + +class HighbdMaskedSubPixelVarianceTest + : public ::testing::TestWithParam<HighbdMaskedSubPixelVarianceParam> { + public: + ~HighbdMaskedSubPixelVarianceTest() override = default; + void SetUp() override { + opt_func_ = GET_PARAM(0); + ref_func_ = GET_PARAM(1); + bit_depth_ = GET_PARAM(2); + } + + protected: + MaskedSubPixelVarianceFunc opt_func_; + MaskedSubPixelVarianceFunc ref_func_; + aom_bit_depth_t bit_depth_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HighbdMaskedSubPixelVarianceTest); + +TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]); + DECLARE_ALIGNED(16, uint16_t, + second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]); + uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); + uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); + uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr); + int err_count = 0; + int first_failure = -1; + int first_failure_x = -1; + int first_failure_y = -1; + int src_stride = (MAX_SB_SIZE + 8); + int ref_stride = (MAX_SB_SIZE + 8); + int msk_stride = (MAX_SB_SIZE + 8); + int xoffset, yoffset; + + for (int i = 0; i < number_of_iterations; ++i) { + for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8); j++) { + src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); + ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); + second_pred_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); + msk_ptr[j] = rnd(65); + } + for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) { + for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) { + for (int invert_mask = 0; invert_mask < 2; ++invert_mask) { + ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr, + ref_stride, second_pred8_ptr, msk_ptr, msk_stride, + invert_mask, &ref_sse); + API_REGISTER_STATE_CHECK( + opt_ret = opt_func_(src8_ptr, src_stride, xoffset, yoffset, + ref8_ptr, ref_stride, second_pred8_ptr, + msk_ptr, msk_stride, invert_mask, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) { + first_failure = i; + first_failure_x = xoffset; + first_failure_y = yoffset; + } + } + } + } + } + } + + EXPECT_EQ(0, err_count) + << "Error: Masked Sub Pixel Variance Test OperationCheck," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure + << " x_offset = " << first_failure_x << " y_offset = " << first_failure_y; +} + +TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) { + unsigned int ref_ret, opt_ret; + unsigned int ref_sse, opt_sse; + ACMRandom rnd(ACMRandom::DeterministicSeed()); + DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]); + DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]); + DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]); + DECLARE_ALIGNED(16, uint16_t, + second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]); + uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); + uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); + uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr); + int first_failure_x = -1; + int first_failure_y = -1; + int err_count = 0; + int first_failure = -1; + int src_stride = (MAX_SB_SIZE + 8); + int ref_stride = (MAX_SB_SIZE + 8); + int msk_stride = (MAX_SB_SIZE + 8); + + for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) { + for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) { + for (int i = 0; i < 16; ++i) { + aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)); + aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)); + aom_memset16(second_pred_ptr, (i & 0x4) ? ((1 << bit_depth_) - 1) : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)); + memset(msk_ptr, (i & 0x8) ? 64 : 0, + (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)); + + for (int invert_mask = 0; invert_mask < 2; ++invert_mask) { + ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr, + ref_stride, second_pred8_ptr, msk_ptr, msk_stride, + invert_mask, &ref_sse); + API_REGISTER_STATE_CHECK( + opt_ret = opt_func_(src8_ptr, src_stride, xoffset, yoffset, + ref8_ptr, ref_stride, second_pred8_ptr, + msk_ptr, msk_stride, invert_mask, &opt_sse)); + + if (opt_ret != ref_ret || opt_sse != ref_sse) { + err_count++; + if (first_failure == -1) { + first_failure = i; + first_failure_x = xoffset; + first_failure_y = yoffset; + } + } + } + } + } + } + + EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues," + << "C output doesn't match SSSE3 output. " + << "First failed at test case " << first_failure + << " x_offset = " << first_failure_x + << " y_offset = " << first_failure_y; +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +using std::make_tuple; + +#if HAVE_SSSE3 + +const MaskedSubPixelVarianceParam sub_pel_var_test[] = { + make_tuple(&aom_masked_sub_pixel_variance128x128_ssse3, + &aom_masked_sub_pixel_variance128x128_c), + make_tuple(&aom_masked_sub_pixel_variance128x64_ssse3, + &aom_masked_sub_pixel_variance128x64_c), + make_tuple(&aom_masked_sub_pixel_variance64x128_ssse3, + &aom_masked_sub_pixel_variance64x128_c), + make_tuple(&aom_masked_sub_pixel_variance64x64_ssse3, + &aom_masked_sub_pixel_variance64x64_c), + make_tuple(&aom_masked_sub_pixel_variance64x32_ssse3, + &aom_masked_sub_pixel_variance64x32_c), + make_tuple(&aom_masked_sub_pixel_variance32x64_ssse3, + &aom_masked_sub_pixel_variance32x64_c), + make_tuple(&aom_masked_sub_pixel_variance32x32_ssse3, + &aom_masked_sub_pixel_variance32x32_c), + make_tuple(&aom_masked_sub_pixel_variance32x16_ssse3, + &aom_masked_sub_pixel_variance32x16_c), + make_tuple(&aom_masked_sub_pixel_variance16x32_ssse3, + &aom_masked_sub_pixel_variance16x32_c), + make_tuple(&aom_masked_sub_pixel_variance16x16_ssse3, + &aom_masked_sub_pixel_variance16x16_c), + make_tuple(&aom_masked_sub_pixel_variance16x8_ssse3, + &aom_masked_sub_pixel_variance16x8_c), + make_tuple(&aom_masked_sub_pixel_variance8x16_ssse3, + &aom_masked_sub_pixel_variance8x16_c), + make_tuple(&aom_masked_sub_pixel_variance8x8_ssse3, + &aom_masked_sub_pixel_variance8x8_c), + make_tuple(&aom_masked_sub_pixel_variance8x4_ssse3, + &aom_masked_sub_pixel_variance8x4_c), + make_tuple(&aom_masked_sub_pixel_variance4x8_ssse3, + &aom_masked_sub_pixel_variance4x8_c), + make_tuple(&aom_masked_sub_pixel_variance4x4_ssse3, + &aom_masked_sub_pixel_variance4x4_c), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_masked_sub_pixel_variance64x16_ssse3, + &aom_masked_sub_pixel_variance64x16_c), + make_tuple(&aom_masked_sub_pixel_variance16x64_ssse3, + &aom_masked_sub_pixel_variance16x64_c), + make_tuple(&aom_masked_sub_pixel_variance32x8_ssse3, + &aom_masked_sub_pixel_variance32x8_c), + make_tuple(&aom_masked_sub_pixel_variance8x32_ssse3, + &aom_masked_sub_pixel_variance8x32_c), + make_tuple(&aom_masked_sub_pixel_variance16x4_ssse3, + &aom_masked_sub_pixel_variance16x4_c), + make_tuple(&aom_masked_sub_pixel_variance4x16_ssse3, + &aom_masked_sub_pixel_variance4x16_c), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(SSSE3_C_COMPARE, MaskedSubPixelVarianceTest, + ::testing::ValuesIn(sub_pel_var_test)); + +#if CONFIG_AV1_HIGHBITDEPTH +const HighbdMaskedSubPixelVarianceParam hbd_sub_pel_var_test[] = { + make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x128_ssse3, + &aom_highbd_8_masked_sub_pixel_variance128x128_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x64_ssse3, + &aom_highbd_8_masked_sub_pixel_variance128x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x128_ssse3, + &aom_highbd_8_masked_sub_pixel_variance64x128_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x64_ssse3, + &aom_highbd_8_masked_sub_pixel_variance64x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x32_ssse3, + &aom_highbd_8_masked_sub_pixel_variance64x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x64_ssse3, + &aom_highbd_8_masked_sub_pixel_variance32x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x32_ssse3, + &aom_highbd_8_masked_sub_pixel_variance32x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x16_ssse3, + &aom_highbd_8_masked_sub_pixel_variance32x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x32_ssse3, + &aom_highbd_8_masked_sub_pixel_variance16x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x16_ssse3, + &aom_highbd_8_masked_sub_pixel_variance16x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x8_ssse3, + &aom_highbd_8_masked_sub_pixel_variance16x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x16_ssse3, + &aom_highbd_8_masked_sub_pixel_variance8x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x8_ssse3, + &aom_highbd_8_masked_sub_pixel_variance8x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x4_ssse3, + &aom_highbd_8_masked_sub_pixel_variance8x4_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x8_ssse3, + &aom_highbd_8_masked_sub_pixel_variance4x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x4_ssse3, + &aom_highbd_8_masked_sub_pixel_variance4x4_c, AOM_BITS_8), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x128_ssse3, + &aom_highbd_10_masked_sub_pixel_variance128x128_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x64_ssse3, + &aom_highbd_10_masked_sub_pixel_variance128x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x128_ssse3, + &aom_highbd_10_masked_sub_pixel_variance64x128_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x64_ssse3, + &aom_highbd_10_masked_sub_pixel_variance64x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x32_ssse3, + &aom_highbd_10_masked_sub_pixel_variance64x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x64_ssse3, + &aom_highbd_10_masked_sub_pixel_variance32x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x32_ssse3, + &aom_highbd_10_masked_sub_pixel_variance32x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x16_ssse3, + &aom_highbd_10_masked_sub_pixel_variance32x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x32_ssse3, + &aom_highbd_10_masked_sub_pixel_variance16x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x16_ssse3, + &aom_highbd_10_masked_sub_pixel_variance16x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x8_ssse3, + &aom_highbd_10_masked_sub_pixel_variance16x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x16_ssse3, + &aom_highbd_10_masked_sub_pixel_variance8x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x8_ssse3, + &aom_highbd_10_masked_sub_pixel_variance8x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x4_ssse3, + &aom_highbd_10_masked_sub_pixel_variance8x4_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x8_ssse3, + &aom_highbd_10_masked_sub_pixel_variance4x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x4_ssse3, + &aom_highbd_10_masked_sub_pixel_variance4x4_c, AOM_BITS_10), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x128_ssse3, + &aom_highbd_12_masked_sub_pixel_variance128x128_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x64_ssse3, + &aom_highbd_12_masked_sub_pixel_variance128x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x128_ssse3, + &aom_highbd_12_masked_sub_pixel_variance64x128_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x64_ssse3, + &aom_highbd_12_masked_sub_pixel_variance64x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x32_ssse3, + &aom_highbd_12_masked_sub_pixel_variance64x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x64_ssse3, + &aom_highbd_12_masked_sub_pixel_variance32x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x32_ssse3, + &aom_highbd_12_masked_sub_pixel_variance32x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x16_ssse3, + &aom_highbd_12_masked_sub_pixel_variance32x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x32_ssse3, + &aom_highbd_12_masked_sub_pixel_variance16x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x16_ssse3, + &aom_highbd_12_masked_sub_pixel_variance16x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x8_ssse3, + &aom_highbd_12_masked_sub_pixel_variance16x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x16_ssse3, + &aom_highbd_12_masked_sub_pixel_variance8x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x8_ssse3, + &aom_highbd_12_masked_sub_pixel_variance8x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x4_ssse3, + &aom_highbd_12_masked_sub_pixel_variance8x4_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x8_ssse3, + &aom_highbd_12_masked_sub_pixel_variance4x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x4_ssse3, + &aom_highbd_12_masked_sub_pixel_variance4x4_c, AOM_BITS_12), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x16_ssse3, + &aom_highbd_8_masked_sub_pixel_variance64x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x64_ssse3, + &aom_highbd_8_masked_sub_pixel_variance16x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x8_ssse3, + &aom_highbd_8_masked_sub_pixel_variance32x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x32_ssse3, + &aom_highbd_8_masked_sub_pixel_variance8x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x4_ssse3, + &aom_highbd_8_masked_sub_pixel_variance16x4_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x16_ssse3, + &aom_highbd_8_masked_sub_pixel_variance4x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x16_ssse3, + &aom_highbd_10_masked_sub_pixel_variance64x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x64_ssse3, + &aom_highbd_10_masked_sub_pixel_variance16x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x8_ssse3, + &aom_highbd_10_masked_sub_pixel_variance32x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x32_ssse3, + &aom_highbd_10_masked_sub_pixel_variance8x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x4_ssse3, + &aom_highbd_10_masked_sub_pixel_variance16x4_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x16_ssse3, + &aom_highbd_10_masked_sub_pixel_variance4x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x16_ssse3, + &aom_highbd_12_masked_sub_pixel_variance64x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x64_ssse3, + &aom_highbd_12_masked_sub_pixel_variance16x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x8_ssse3, + &aom_highbd_12_masked_sub_pixel_variance32x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x32_ssse3, + &aom_highbd_12_masked_sub_pixel_variance8x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x4_ssse3, + &aom_highbd_12_masked_sub_pixel_variance16x4_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x16_ssse3, + &aom_highbd_12_masked_sub_pixel_variance4x16_c, AOM_BITS_12), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest, + ::testing::ValuesIn(hbd_sub_pel_var_test)); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_SSSE3 + +#if HAVE_NEON + +const MaskedSubPixelVarianceParam sub_pel_var_test[] = { + make_tuple(&aom_masked_sub_pixel_variance128x128_neon, + &aom_masked_sub_pixel_variance128x128_c), + make_tuple(&aom_masked_sub_pixel_variance128x64_neon, + &aom_masked_sub_pixel_variance128x64_c), + make_tuple(&aom_masked_sub_pixel_variance64x128_neon, + &aom_masked_sub_pixel_variance64x128_c), + make_tuple(&aom_masked_sub_pixel_variance64x64_neon, + &aom_masked_sub_pixel_variance64x64_c), + make_tuple(&aom_masked_sub_pixel_variance64x32_neon, + &aom_masked_sub_pixel_variance64x32_c), + make_tuple(&aom_masked_sub_pixel_variance32x64_neon, + &aom_masked_sub_pixel_variance32x64_c), + make_tuple(&aom_masked_sub_pixel_variance32x32_neon, + &aom_masked_sub_pixel_variance32x32_c), + make_tuple(&aom_masked_sub_pixel_variance32x16_neon, + &aom_masked_sub_pixel_variance32x16_c), + make_tuple(&aom_masked_sub_pixel_variance16x32_neon, + &aom_masked_sub_pixel_variance16x32_c), + make_tuple(&aom_masked_sub_pixel_variance16x16_neon, + &aom_masked_sub_pixel_variance16x16_c), + make_tuple(&aom_masked_sub_pixel_variance16x8_neon, + &aom_masked_sub_pixel_variance16x8_c), + make_tuple(&aom_masked_sub_pixel_variance8x16_neon, + &aom_masked_sub_pixel_variance8x16_c), + make_tuple(&aom_masked_sub_pixel_variance8x8_neon, + &aom_masked_sub_pixel_variance8x8_c), + make_tuple(&aom_masked_sub_pixel_variance8x4_neon, + &aom_masked_sub_pixel_variance8x4_c), + make_tuple(&aom_masked_sub_pixel_variance4x8_neon, + &aom_masked_sub_pixel_variance4x8_c), + make_tuple(&aom_masked_sub_pixel_variance4x4_neon, + &aom_masked_sub_pixel_variance4x4_c), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_masked_sub_pixel_variance64x16_neon, + &aom_masked_sub_pixel_variance64x16_c), + make_tuple(&aom_masked_sub_pixel_variance16x64_neon, + &aom_masked_sub_pixel_variance16x64_c), + make_tuple(&aom_masked_sub_pixel_variance32x8_neon, + &aom_masked_sub_pixel_variance32x8_c), + make_tuple(&aom_masked_sub_pixel_variance8x32_neon, + &aom_masked_sub_pixel_variance8x32_c), + make_tuple(&aom_masked_sub_pixel_variance16x4_neon, + &aom_masked_sub_pixel_variance16x4_c), + make_tuple(&aom_masked_sub_pixel_variance4x16_neon, + &aom_masked_sub_pixel_variance4x16_c), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(NEON_C_COMPARE, MaskedSubPixelVarianceTest, + ::testing::ValuesIn(sub_pel_var_test)); + +#if CONFIG_AV1_HIGHBITDEPTH +const HighbdMaskedSubPixelVarianceParam hbd_sub_pel_var_test_neon[] = { + make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x128_neon, + &aom_highbd_8_masked_sub_pixel_variance128x128_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x64_neon, + &aom_highbd_8_masked_sub_pixel_variance128x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x128_neon, + &aom_highbd_8_masked_sub_pixel_variance64x128_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x64_neon, + &aom_highbd_8_masked_sub_pixel_variance64x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x32_neon, + &aom_highbd_8_masked_sub_pixel_variance64x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x64_neon, + &aom_highbd_8_masked_sub_pixel_variance32x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x32_neon, + &aom_highbd_8_masked_sub_pixel_variance32x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x16_neon, + &aom_highbd_8_masked_sub_pixel_variance32x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x32_neon, + &aom_highbd_8_masked_sub_pixel_variance16x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x16_neon, + &aom_highbd_8_masked_sub_pixel_variance16x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x8_neon, + &aom_highbd_8_masked_sub_pixel_variance16x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x16_neon, + &aom_highbd_8_masked_sub_pixel_variance8x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x8_neon, + &aom_highbd_8_masked_sub_pixel_variance8x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x4_neon, + &aom_highbd_8_masked_sub_pixel_variance8x4_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x8_neon, + &aom_highbd_8_masked_sub_pixel_variance4x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x4_neon, + &aom_highbd_8_masked_sub_pixel_variance4x4_c, AOM_BITS_8), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x128_neon, + &aom_highbd_10_masked_sub_pixel_variance128x128_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x64_neon, + &aom_highbd_10_masked_sub_pixel_variance128x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x128_neon, + &aom_highbd_10_masked_sub_pixel_variance64x128_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x64_neon, + &aom_highbd_10_masked_sub_pixel_variance64x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x32_neon, + &aom_highbd_10_masked_sub_pixel_variance64x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x64_neon, + &aom_highbd_10_masked_sub_pixel_variance32x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x32_neon, + &aom_highbd_10_masked_sub_pixel_variance32x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x16_neon, + &aom_highbd_10_masked_sub_pixel_variance32x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x32_neon, + &aom_highbd_10_masked_sub_pixel_variance16x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x16_neon, + &aom_highbd_10_masked_sub_pixel_variance16x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x8_neon, + &aom_highbd_10_masked_sub_pixel_variance16x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x16_neon, + &aom_highbd_10_masked_sub_pixel_variance8x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x8_neon, + &aom_highbd_10_masked_sub_pixel_variance8x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x4_neon, + &aom_highbd_10_masked_sub_pixel_variance8x4_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x8_neon, + &aom_highbd_10_masked_sub_pixel_variance4x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x4_neon, + &aom_highbd_10_masked_sub_pixel_variance4x4_c, AOM_BITS_10), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x128_neon, + &aom_highbd_12_masked_sub_pixel_variance128x128_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x64_neon, + &aom_highbd_12_masked_sub_pixel_variance128x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x128_neon, + &aom_highbd_12_masked_sub_pixel_variance64x128_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x64_neon, + &aom_highbd_12_masked_sub_pixel_variance64x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x32_neon, + &aom_highbd_12_masked_sub_pixel_variance64x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x64_neon, + &aom_highbd_12_masked_sub_pixel_variance32x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x32_neon, + &aom_highbd_12_masked_sub_pixel_variance32x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x16_neon, + &aom_highbd_12_masked_sub_pixel_variance32x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x32_neon, + &aom_highbd_12_masked_sub_pixel_variance16x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x16_neon, + &aom_highbd_12_masked_sub_pixel_variance16x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x8_neon, + &aom_highbd_12_masked_sub_pixel_variance16x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x16_neon, + &aom_highbd_12_masked_sub_pixel_variance8x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x8_neon, + &aom_highbd_12_masked_sub_pixel_variance8x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x4_neon, + &aom_highbd_12_masked_sub_pixel_variance8x4_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x8_neon, + &aom_highbd_12_masked_sub_pixel_variance4x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x4_neon, + &aom_highbd_12_masked_sub_pixel_variance4x4_c, AOM_BITS_12), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x16_neon, + &aom_highbd_8_masked_sub_pixel_variance64x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x64_neon, + &aom_highbd_8_masked_sub_pixel_variance16x64_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x8_neon, + &aom_highbd_8_masked_sub_pixel_variance32x8_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x32_neon, + &aom_highbd_8_masked_sub_pixel_variance8x32_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x4_neon, + &aom_highbd_8_masked_sub_pixel_variance16x4_c, AOM_BITS_8), + make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x16_neon, + &aom_highbd_8_masked_sub_pixel_variance4x16_c, AOM_BITS_8), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x16_neon, + &aom_highbd_10_masked_sub_pixel_variance64x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x64_neon, + &aom_highbd_10_masked_sub_pixel_variance16x64_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x8_neon, + &aom_highbd_10_masked_sub_pixel_variance32x8_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x32_neon, + &aom_highbd_10_masked_sub_pixel_variance8x32_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x4_neon, + &aom_highbd_10_masked_sub_pixel_variance16x4_c, AOM_BITS_10), + make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x16_neon, + &aom_highbd_10_masked_sub_pixel_variance4x16_c, AOM_BITS_10), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x16_neon, + &aom_highbd_12_masked_sub_pixel_variance64x16_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x64_neon, + &aom_highbd_12_masked_sub_pixel_variance16x64_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x8_neon, + &aom_highbd_12_masked_sub_pixel_variance32x8_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x32_neon, + &aom_highbd_12_masked_sub_pixel_variance8x32_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x4_neon, + &aom_highbd_12_masked_sub_pixel_variance16x4_c, AOM_BITS_12), + make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x16_neon, + &aom_highbd_12_masked_sub_pixel_variance4x16_c, AOM_BITS_12), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(NEON_C_COMPARE, HighbdMaskedSubPixelVarianceTest, + ::testing::ValuesIn(hbd_sub_pel_var_test_neon)); + +#endif // CONFIG_AV1_HIGHBITDEPTH + +#endif // HAVE_NEON +} // namespace diff --git a/third_party/aom/test/md5_helper.h b/third_party/aom/test/md5_helper.h new file mode 100644 index 0000000000..69f1ae76b0 --- /dev/null +++ b/third_party/aom/test/md5_helper.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_MD5_HELPER_H_ +#define AOM_TEST_MD5_HELPER_H_ + +#include "aom/aom_decoder.h" +#include "common/md5_utils.h" + +namespace libaom_test { +class MD5 { + public: + MD5() { MD5Init(&md5_); } + + void Add(const aom_image_t *img) { + for (int plane = 0; plane < 3; ++plane) { + const uint8_t *buf = img->planes[plane]; + // Calculate the width and height to do the md5 check. For the chroma + // plane, we never want to round down and thus skip a pixel so if + // we are shifting by 1 (chroma_shift) we add 1 before doing the shift. + // This works only for chroma_shift of 0 and 1. + const int bytes_per_sample = + (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; + const int h = + plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift + : img->d_h; + const int w = + (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift + : img->d_w) * + bytes_per_sample; + + for (int y = 0; y < h; ++y) { + MD5Update(&md5_, buf, w); + buf += img->stride[plane]; + } + } + } + + void Add(const uint8_t *data, size_t size) { + MD5Update(&md5_, data, static_cast<uint32_t>(size)); + } + + const char *Get() { + static const char hex[16] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', + }; + uint8_t tmp[16]; + MD5Context ctx_tmp = md5_; + + MD5Final(tmp, &ctx_tmp); + for (int i = 0; i < 16; i++) { + res_[i * 2 + 0] = hex[tmp[i] >> 4]; + res_[i * 2 + 1] = hex[tmp[i] & 0xf]; + } + res_[32] = 0; + + return res_; + } + + protected: + char res_[33]; + MD5Context md5_; +}; + +} // namespace libaom_test + +#endif // AOM_TEST_MD5_HELPER_H_ diff --git a/third_party/aom/test/metadata_test.cc b/third_party/aom/test/metadata_test.cc new file mode 100644 index 0000000000..9467c29e86 --- /dev/null +++ b/third_party/aom/test/metadata_test.cc @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "aom/aom_codec.h" +#include "aom/aom_image.h" +#include "aom/internal/aom_image_internal.h" +#include "aom_scale/yv12config.h" +#include "av1/encoder/bitstream.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/video_source.h" + +namespace { +const size_t kMetadataPayloadSizeT35 = 24; +// 0xB5 stands for the itut t35 metadata country code for the Unites States +const uint8_t kMetadataPayloadT35[kMetadataPayloadSizeT35] = { + 0xB5, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 +}; + +const size_t kMetadataPayloadSizeCll = 4; +const uint8_t kMetadataPayloadCll[kMetadataPayloadSizeCll] = { 0xB5, 0x01, 0x02, + 0x03 }; + +const size_t kMetadataObuSizeT35 = 28; +const uint8_t kMetadataObuT35[kMetadataObuSizeT35] = { + 0x2A, 0x1A, 0x02, 0xB5, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, + 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x80 +}; +const size_t kMetadataObuSizeMdcv = 28; +const uint8_t kMetadataObuMdcv[kMetadataObuSizeMdcv] = { + 0x2A, 0x1A, 0x02, 0xB5, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, + 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x80 +}; +const size_t kMetadataObuSizeCll = 8; +const uint8_t kMetadataObuCll[kMetadataObuSizeCll] = { 0x2A, 0x06, 0x01, 0xB5, + 0x01, 0x02, 0x03, 0x80 }; + +class MetadataEncodeTest + : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>, + public ::libaom_test::EncoderTest { + protected: + MetadataEncodeTest() : EncoderTest(GET_PARAM(0)) {} + + ~MetadataEncodeTest() override = default; + + void SetUp() override { InitializeConfig(GET_PARAM(1)); } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder * /*encoder*/) override { + aom_image_t *current_frame = video->img(); + if (current_frame) { + if (current_frame->metadata) aom_img_remove_metadata(current_frame); + ASSERT_EQ(aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_ITUT_T35, + kMetadataPayloadT35, 0, AOM_MIF_ANY_FRAME), + -1); + ASSERT_EQ(aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_ITUT_T35, + nullptr, kMetadataPayloadSizeT35, + AOM_MIF_ANY_FRAME), + -1); + ASSERT_EQ(aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_ITUT_T35, + nullptr, 0, AOM_MIF_ANY_FRAME), + -1); + ASSERT_EQ( + aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_ITUT_T35, + kMetadataPayloadT35, kMetadataPayloadSizeT35, + AOM_MIF_ANY_FRAME), + 0); + + ASSERT_EQ( + aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_HDR_MDCV, + kMetadataPayloadT35, kMetadataPayloadSizeT35, + AOM_MIF_KEY_FRAME), + 0); + + ASSERT_EQ( + aom_img_add_metadata(current_frame, OBU_METADATA_TYPE_HDR_CLL, + kMetadataPayloadCll, kMetadataPayloadSizeCll, + AOM_MIF_KEY_FRAME), + 0); + } + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const size_t bitstream_size = pkt->data.frame.sz; + const uint8_t *bitstream = + static_cast<const uint8_t *>(pkt->data.frame.buf); + // look for valid metadatas in bitstream + bool itut_t35_metadata_found = false; + if (bitstream_size >= kMetadataObuSizeT35) { + for (size_t i = 0; i <= bitstream_size - kMetadataObuSizeT35; ++i) { + if (memcmp(bitstream + i, kMetadataObuT35, kMetadataObuSizeT35) == + 0) { + itut_t35_metadata_found = true; + } + } + } + ASSERT_EQ(itut_t35_metadata_found, 1u); + + // Testing for HDR MDCV metadata + bool hdr_mdcv_metadata_found = false; + if (bitstream_size >= kMetadataObuSizeMdcv) { + for (size_t i = 0; i <= bitstream_size - kMetadataObuSizeMdcv; ++i) { + if (memcmp(bitstream + i, kMetadataObuMdcv, kMetadataObuSizeMdcv) == + 0) { + hdr_mdcv_metadata_found = true; + } + } + } + ASSERT_TRUE(hdr_mdcv_metadata_found); + + // Testing for HDR CLL metadata + bool hdr_cll_metadata_found = false; + if (bitstream_size >= kMetadataObuSizeCll) { + for (size_t i = 0; i <= bitstream_size - kMetadataObuSizeCll; ++i) { + if (memcmp(bitstream + i, kMetadataObuCll, kMetadataObuSizeCll) == + 0) { + hdr_cll_metadata_found = true; + } + } + } + ASSERT_TRUE(hdr_cll_metadata_found); + } + } + + void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t /*pts*/) override { + ASSERT_NE(img.metadata, nullptr); + + ASSERT_EQ(img.metadata->sz, 3u); + + for (size_t i = 0; i < img.metadata->sz - 1; ++i) { + ASSERT_EQ(kMetadataPayloadSizeT35, img.metadata->metadata_array[i]->sz); + EXPECT_EQ( + memcmp(kMetadataPayloadT35, img.metadata->metadata_array[i]->payload, + kMetadataPayloadSizeT35), + 0); + } + + ASSERT_EQ(kMetadataPayloadSizeCll, img.metadata->metadata_array[2]->sz); + EXPECT_EQ( + memcmp(kMetadataPayloadCll, img.metadata->metadata_array[2]->payload, + kMetadataPayloadSizeCll), + 0); + } +}; + +TEST_P(MetadataEncodeTest, TestMetadataEncoding) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 5); + init_flags_ = AOM_CODEC_USE_PSNR; + + cfg_.g_w = 352; + cfg_.g_h = 288; + + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_end_usage = AOM_CBR; + cfg_.kf_mode = AOM_KF_AUTO; + cfg_.g_lag_in_frames = 1; + cfg_.kf_min_dist = cfg_.kf_max_dist = 3000; + // Enable dropped frames. + cfg_.rc_dropframe_thresh = 1; + // Disable error_resilience mode. + cfg_.g_error_resilient = 0; + // Run at low bitrate. + cfg_.rc_target_bitrate = 40; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_SUITE(MetadataEncodeTest, + ::testing::Values(::libaom_test::kOnePassGood)); + +} // namespace + +TEST(MetadataTest, MetadataAllocation) { + aom_metadata_t *metadata = + aom_img_metadata_alloc(OBU_METADATA_TYPE_ITUT_T35, kMetadataPayloadT35, + kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME); + ASSERT_NE(metadata, nullptr); + aom_img_metadata_free(metadata); +} + +TEST(MetadataTest, MetadataArrayAllocation) { + aom_metadata_array_t *metadata_array = aom_img_metadata_array_alloc(2); + ASSERT_NE(metadata_array, nullptr); + + metadata_array->metadata_array[0] = + aom_img_metadata_alloc(OBU_METADATA_TYPE_ITUT_T35, kMetadataPayloadT35, + kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME); + metadata_array->metadata_array[1] = + aom_img_metadata_alloc(OBU_METADATA_TYPE_ITUT_T35, kMetadataPayloadT35, + kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME); + + aom_img_metadata_array_free(metadata_array); +} + +TEST(MetadataTest, AddMetadataToImage) { + aom_image_t image; + image.metadata = nullptr; + + ASSERT_EQ(aom_img_add_metadata(&image, OBU_METADATA_TYPE_ITUT_T35, + kMetadataPayloadT35, kMetadataPayloadSizeT35, + AOM_MIF_ANY_FRAME), + 0); + aom_img_metadata_array_free(image.metadata); + EXPECT_EQ(aom_img_add_metadata(nullptr, OBU_METADATA_TYPE_ITUT_T35, + kMetadataPayloadT35, kMetadataPayloadSizeT35, + AOM_MIF_ANY_FRAME), + -1); +} + +TEST(MetadataTest, RemoveMetadataFromImage) { + aom_image_t image; + image.metadata = nullptr; + + ASSERT_EQ(aom_img_add_metadata(&image, OBU_METADATA_TYPE_ITUT_T35, + kMetadataPayloadT35, kMetadataPayloadSizeT35, + AOM_MIF_ANY_FRAME), + 0); + aom_img_remove_metadata(&image); + aom_img_remove_metadata(nullptr); +} + +TEST(MetadataTest, CopyMetadataToFrameBuffer) { + YV12_BUFFER_CONFIG yvBuf; + yvBuf.metadata = nullptr; + + aom_metadata_array_t *metadata_array = aom_img_metadata_array_alloc(1); + ASSERT_NE(metadata_array, nullptr); + + metadata_array->metadata_array[0] = + aom_img_metadata_alloc(OBU_METADATA_TYPE_ITUT_T35, kMetadataPayloadT35, + kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME); + + // Metadata_array + int status = aom_copy_metadata_to_frame_buffer(&yvBuf, metadata_array); + EXPECT_EQ(status, 0); + status = aom_copy_metadata_to_frame_buffer(nullptr, metadata_array); + EXPECT_EQ(status, -1); + aom_img_metadata_array_free(metadata_array); + + // Metadata_array_2 + aom_metadata_array_t *metadata_array_2 = aom_img_metadata_array_alloc(0); + ASSERT_NE(metadata_array_2, nullptr); + status = aom_copy_metadata_to_frame_buffer(&yvBuf, metadata_array_2); + EXPECT_EQ(status, -1); + aom_img_metadata_array_free(metadata_array_2); + + // YV12_BUFFER_CONFIG + status = aom_copy_metadata_to_frame_buffer(&yvBuf, nullptr); + EXPECT_EQ(status, -1); + aom_remove_metadata_from_frame_buffer(&yvBuf); + aom_remove_metadata_from_frame_buffer(nullptr); +} + +TEST(MetadataTest, GetMetadataFromImage) { + aom_image_t image; + image.metadata = nullptr; + + ASSERT_EQ(aom_img_add_metadata(&image, OBU_METADATA_TYPE_ITUT_T35, + kMetadataPayloadT35, kMetadataPayloadSizeT35, + AOM_MIF_ANY_FRAME), + 0); + + EXPECT_EQ(aom_img_get_metadata(nullptr, 0), nullptr); + EXPECT_EQ(aom_img_get_metadata(&image, 1u), nullptr); + EXPECT_EQ(aom_img_get_metadata(&image, 10u), nullptr); + + const aom_metadata_t *metadata = aom_img_get_metadata(&image, 0); + ASSERT_NE(metadata, nullptr); + ASSERT_EQ(metadata->sz, kMetadataPayloadSizeT35); + EXPECT_EQ( + memcmp(kMetadataPayloadT35, metadata->payload, kMetadataPayloadSizeT35), + 0); + + aom_img_metadata_array_free(image.metadata); +} + +TEST(MetadataTest, ReadMetadatasFromImage) { + aom_image_t image; + image.metadata = nullptr; + + uint32_t types[3]; + types[0] = OBU_METADATA_TYPE_ITUT_T35; + types[1] = OBU_METADATA_TYPE_HDR_CLL; + types[2] = OBU_METADATA_TYPE_HDR_MDCV; + + ASSERT_EQ(aom_img_add_metadata(&image, types[0], kMetadataPayloadT35, + kMetadataPayloadSizeT35, AOM_MIF_ANY_FRAME), + 0); + ASSERT_EQ(aom_img_add_metadata(&image, types[1], kMetadataPayloadT35, + kMetadataPayloadSizeT35, AOM_MIF_KEY_FRAME), + 0); + ASSERT_EQ(aom_img_add_metadata(&image, types[2], kMetadataPayloadT35, + kMetadataPayloadSizeT35, AOM_MIF_KEY_FRAME), + 0); + + size_t number_metadata = aom_img_num_metadata(&image); + ASSERT_EQ(number_metadata, 3u); + for (size_t i = 0; i < number_metadata; ++i) { + const aom_metadata_t *metadata = aom_img_get_metadata(&image, i); + ASSERT_NE(metadata, nullptr); + ASSERT_EQ(metadata->type, types[i]); + ASSERT_EQ(metadata->sz, kMetadataPayloadSizeT35); + EXPECT_EQ( + memcmp(kMetadataPayloadT35, metadata->payload, kMetadataPayloadSizeT35), + 0); + } + aom_img_metadata_array_free(image.metadata); +} diff --git a/third_party/aom/test/metrics_template.html b/third_party/aom/test/metrics_template.html new file mode 100644 index 0000000000..b57c62314a --- /dev/null +++ b/third_party/aom/test/metrics_template.html @@ -0,0 +1,422 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<title>Video Codec Test Results</title> +<style type="text/css"> +<!-- Begin 960 reset --> +a,abbr,acronym,address,applet,article,aside,audio,b,big,blockquote,body,canvas,caption,center,cite,c +ode,dd,del,details,dfn,dialog,div,dl,dt,em,embed,fieldset,figcaption,figure,font,footer,form,h1,h2,h +3,h4,h5,h6,header,hgroup,hr,html,i,iframe,img,ins,kbd,label,legend,li,mark,menu,meter,nav,object,ol, +output,p,pre,progress,q,rp,rt,ruby,s,samp,section,small,span,strike,strong,sub,summary,sup,table,tbo +dy,td,tfoot,th,thead,time,tr,tt,u,ul,var,video,xmp{border:0;margin:0;padding:0;font-size:100%}html,b +ody{height:100%}article,aside,details,figcaption,figure,footer,header,hgroup,menu,nav,section{displa +y:block}b,strong{font-weight:bold}img{color:transparent;font-size:0;vertical-align:middle;-ms-interp +olation-mode:bicubic}ol,ul{list-style:none}li{display:list-item}table{border-collapse:collapse;borde +r-spacing:0}th,td,caption{font-weight:normal;vertical-align:top;text-align:left}q{quotes:none}q:befo +re,q:after{content:'';content:none}sub,sup,small{font-size:75%}sub,sup{line-height:0;position:relati +ve;vertical-align:baseline}sub{bottom:-0.25em}sup{top:-0.5em}svg{overflow:hidden} +<!-- End 960 reset --> +<!-- Begin 960 text --> +body{font:13px/1.5 'Helvetica Neue',Arial,'Liberation Sans',FreeSans,sans-serif}pre,code{font-family +:'DejaVu Sans Mono',Menlo,Consolas,monospace}hr{border:0 #ccc solid;border-top-width:1px;clear:both; +height:0}h1{font-size:25px}h2{font-size:23px}h3{font-size:21px}h4{font-size:19px}h5{font-size:17px}h +6{font-size:15px}ol{list-style:decimal}ul{list-style:disc}li{margin-left:30px}p,dl,hr,h1,h2,h3,h4,h5 +,h6,ol,ul,pre,table,address,fieldset,figure{margin-bottom:20px} +<!-- End 960 text --> +<!-- Begin 960 grid (fluid variant) + 12 columns, 1152px total width + http://960.gs/ | http://grids.heroku.com/ --> +.container_12{width:92%;margin-left:4%;margin-right:4%}.grid_1,.grid_2,.grid_3,.grid_4,.grid_5,.grid +_6,.grid_7,.grid_8,.grid_9,.grid_10,.grid_11,.grid_12{display:inline;float:left;position:relative;ma +rgin-left:1%;margin-right:1%}.alpha{margin-left:0}.omega{margin-right:0}.container_12 .grid_1{width: +6.333%}.container_12 .grid_2{width:14.667%}.container_12 .grid_3{width:23.0%}.container_12 .grid_4{w +idth:31.333%}.container_12 .grid_5{width:39.667%}.container_12 .grid_6{width:48.0%}.container_12 .gr +id_7{width:56.333%}.container_12 .grid_8{width:64.667%}.container_12 .grid_9{width:73.0%}.container_ +12 .grid_10{width:81.333%}.container_12 .grid_11{width:89.667%}.container_12 .grid_12{width:98.0%}.c +ontainer_12 .prefix_1{padding-left:8.333%}.container_12 .prefix_2{padding-left:16.667%}.container_12 + .prefix_3{padding-left:25.0%}.container_12 .prefix_4{padding-left:33.333%}.container_12 .prefix_5{p +adding-left:41.667%}.container_12 .prefix_6{padding-left:50.0%}.container_12 .prefix_7{padding-left: +58.333%}.container_12 .prefix_8{padding-left:66.667%}.container_12 .prefix_9{padding-left:75.0%}.con +tainer_12 .prefix_10{padding-left:83.333%}.container_12 .prefix_11{padding-left:91.667%}.container_1 +2 .suffix_1{padding-right:8.333%}.container_12 .suffix_2{padding-right:16.667%}.container_12 .suffix +_3{padding-right:25.0%}.container_12 .suffix_4{padding-right:33.333%}.container_12 .suffix_5{padding +-right:41.667%}.container_12 .suffix_6{padding-right:50.0%}.container_12 .suffix_7{padding-right:58. +333%}.container_12 .suffix_8{padding-right:66.667%}.container_12 .suffix_9{padding-right:75.0%}.cont +ainer_12 .suffix_10{padding-right:83.333%}.container_12 .suffix_11{padding-right:91.667%}.container_ +12 .push_1{left:8.333%}.container_12 .push_2{left:16.667%}.container_12 .push_3{left:25.0%}.containe +r_12 .push_4{left:33.333%}.container_12 .push_5{left:41.667%}.container_12 .push_6{left:50.0%}.conta +iner_12 .push_7{left:58.333%}.container_12 .push_8{left:66.667%}.container_12 .push_9{left:75.0%}.co +ntainer_12 .push_10{left:83.333%}.container_12 .push_11{left:91.667%}.container_12 .pull_1{left:-8.3 +33%}.container_12 .pull_2{left:-16.667%}.container_12 .pull_3{left:-25.0%}.container_12 .pull_4{left +:-33.333%}.container_12 .pull_5{left:-41.667%}.container_12 .pull_6{left:-50.0%}.container_12 .pull_ +7{left:-58.333%}.container_12 .pull_8{left:-66.667%}.container_12 .pull_9{left:-75.0%}.container_12 +.pull_10{left:-83.333%}.container_12 .pull_11{left:-91.667%}.clear{clear:both;display:block;overflow +:hidden;visibility:hidden;width:0;height:0}.clearfix:after{clear:both;content:' ';display:block;font +-size:0;line-height:0;visibility:hidden;width:0;height:0}.clearfix{display:inline-block}* html .clea +rfix{height:1%}.clearfix{display:block} +<!-- End 960 grid --> + +div.metricgraph { + +} + +body { + +} + +div.header { + font-family: Arial, sans-serif; +} + +div.header h2 { + margin: .5em auto; +} + +div.radio { + font-family: Arial, sans-serif; + margin-bottom: 1em; +} + +div.main { + +} + +div.cliplist { + font-family: Arial, sans-serif; + margin-top: 6px; +} + +div.chartarea { + font-family: Arial, sans-serif; +} + +div.indicators { + font-family: Arial, sans-serif; + font-size: 13px; + margin-top: 6px; + min-height: 600px; + background-color: #f7f7f7; +} + +div.indicators div.content { + margin: 1em; +} + +div.indicators div.content h5 { + font-size: 13px; + text-align: center; + margin: 0; +} + +div.indicators div.content ul { + margin-left: 0; + padding-left: 0; + margin-top: 0; +} + +div.indicators div.content ul li { + margin-left: 1.5em; +} + +div.indicators div.content p:first-child { + margin-bottom: .5em; +} + +span.google-visualization-table-sortind { + color: #000; +} +.header-style { + font-weight: bold; + border: 1px solid #fff; + background-color: #ccc; +} + +td.header-style+td { + +} + +.orange-background { + background-color: orange; +} + +.light-gray-background { + background-color: #f0f0f0; +} +</style> +<script type="text/javascript" src="https://www.google.com/jsapi"></script> +<script type="text/javascript"> +var chart_left = 40; +var chart_top = 6; +var chart_height = document.documentElement.clientHeight-100; +var chart_width = "100%"; +ftable='filestable_avg' +var snrs = []; +var filestable_dsnr = []; +var filestable_drate = []; +var filestable_avg = []; + +// Python template code replaces the following 2 lines. +//%%metrics_js%%// +//%%filestable_dpsnr%%// +//%%filestable_avg%%// +//%%filestable_drate%%// +//%%snrs%%// + +var selected = 0 +var imagestr = ''; +var bettertable=0; +var chart=0; +var better=0; +var metricdata=0; +var metricView=0; +var column=1; +var formatter=0; + +function changeColumn(col) { + column = col; + console.log(col) + draw_files(); +} + +function changeMetric(m) { + ftable=m + draw_files() +} + +function setup_vis() { + chart = new google.visualization.ScatterChart( + document.getElementById("metricgraph")); + + bettertable = new google.visualization.Table( + document.getElementById("bettertable")); + + draw_files(); + build_metrics_radio(); +} + +function build_metrics_radio() { + for (metric=1; metric < metrics.length; metric++) { + var rb = document.createElement('input'); + var l = document.createElement('label'); + rb.setAttribute('type','radio'); + rb.setAttribute('name','metric'); + rb.setAttribute('onClick', "changeColumn('"+metric.toString()+"')"); + l.innerHTML = metrics[metric]; + document.getElementById('metrics').appendChild(rb); + document.getElementById('metrics').appendChild(l); + } +} + +function draw_files() { + var options = {'allowHtml': true, 'width': "100%", 'height': "50%"}; + if (better != 0) delete better; + + col=eval(ftable+'[column]') + better = new google.visualization.DataTable(col) + + // Python Template code replaces the following line with a list of + // formatters. + if (ftable == 'filestable_dsnr') + formatter = new google.visualization.NumberFormat( + {fractionDigits: 4, suffix:" db"}); + else + formatter = new google.visualization.NumberFormat( + {fractionDigits: 4, suffix:"%"}); + + //%%formatters%%// + + bettertable.draw(better,options); + google.visualization.events.addListener(bettertable, 'select', + selectBetterHandler); + query_file() +} + +function query_file() { + imagestr = better.getFormattedValue(selected, 0) + var metricjson = eval('(' + snrs[column][selected] + ')'); + metricdata = new google.visualization.DataTable(metricjson, 0.6); + if( metricView != 0 ) delete metricView; + metricView = new google.visualization.DataView(metricdata); + + chart.draw(metricView, {curveType:'function', + explorer: {}, + chartArea:{left:chart_left, top:chart_top, width:chart_width, + height:chart_height-90}, + hAxis:{title:"Datarate in kbps"}, + vAxis:{title:"Quality in decibels", format: '##.0', textPosition: 'in'}, + legend:{position:"in"}, title:imagestr, pointSize:2, lineWidth:1, + width:chart_width, height:chart_height-50 }); + + google.visualization.events.addListener(chart, 'select', chartSelect); + google.visualization.events.addListener(chart, 'onmouseover', chartMouseOver); + google.visualization.events.addListener(chart, 'onmouseout', chartMouseOut); +} + +function chartMouseOut(e) { + statusbar = document.getElementById('status'); + statusbar.style.display = 'none'; +} + +function chartMouseOver(e) { + pointDifference(e.row, e.column) +} + +function pointDifference(row, col) { + if(!row || !col) + return; + + var cols = metricdata.getNumberOfColumns(); + var rows = metricdata.getNumberOfRows(); + + var sel_bitrate = metricView.getValue(row, 0 ); + var sel_metric = metricView.getValue(row, col); + + var message = '<ul>' + metricView.getColumnLabel(col) + + ' (' + sel_bitrate.toFixed(0) + ' kbps, ' + sel_metric.toFixed(2) + ')' + ' is '; + + + // col 0 is datarate + for( var i=1;i<cols;++i) { + + var metric_greatest_thats_less = 0; + var rate_greatest_thats_less = 0; + var metric_smallest_thats_greater = 999; + var rate_smallest_thats_greater = 0; + + if(i==col) + continue; + + // Find the lowest metric for the column that's greater than sel_metric and + // the highest metric for this column that's less than the metric. + for(var line_count = 0; line_count < rows; ++line_count) { + this_metric = metricdata.getValue(line_count, i) + this_rate = metricdata.getValue(line_count, 0) + if(!this_metric) + continue; + + if(this_metric > metric_greatest_thats_less && + this_metric <= sel_metric) { + metric_greatest_thats_less = this_metric; + rate_greatest_thats_less = this_rate; + } + if(this_metric < metric_smallest_thats_greater && + this_metric > sel_metric) { + metric_smallest_thats_greater = this_metric; + rate_smallest_thats_greater = this_rate; + } + } + + if(rate_smallest_thats_greater == 0 || rate_greatest_thats_less == 0) { + message = message + " <li> Couldn't find a point on both sides.</li>" + } else { + metric_slope = ( rate_smallest_thats_greater - rate_greatest_thats_less) / + ( metric_smallest_thats_greater - metric_greatest_thats_less); + + projected_rate = ( sel_metric - metric_greatest_thats_less) * + metric_slope + rate_greatest_thats_less; + + difference = 100 * (projected_rate / sel_bitrate - 1); + + + if (difference > 0) + message = message + "<li> " + difference.toFixed(2) + + "% smaller than <em>" + + metricdata.getColumnLabel(i) + "</em></li> " + else + message = message + "<li> " + -difference.toFixed(2) + + "% bigger than <em>" + + metricdata.getColumnLabel(i) + "</em></li> " + } + + } + message = message + "</ul>" + statusbar = document.getElementById('status'); + statusbar.innerHTML = "<p>" + message + "</p>"; + statusbar.style.display = 'block'; +} + +function chartSelect() { + var selection = chart.getSelection(); + var message = ''; + var min = metricView.getFormattedValue(selection[0].row, 0); + var max = metricView.getFormattedValue(selection[selection.length-1].row, 0); + var val = metricView.getFormattedValue(selection[0].row,selection[0].column); + + pointDifference(selection[0].row, selection[0].column) + min = min / 3 + max = max * 3 + metricView.setRows(metricdata.getFilteredRows( + [{column: 0,minValue: min, maxValue:max}])); + + chart.draw(metricView, {curveType:'function', + chartArea:{left:40, top:10, width:chart_width, height:chart_height - 110}, + hAxis:{title:"datarate in kbps"}, vAxis:{title:"quality in decibels"}, + legend:{position:"in"}, title:imagestr, pointSize:2, lineWidth:1, + width:chart_width, height:chart_height - 50}); +} + +function selectBetterHandler() { + var selection = bettertable.getSelection(); + for (var i = 0; i < selection.length; i++) { + item = selection[i]; + } + selected = item.row + query_file() +} + + +google.load('visualization', '1', {'packages' : ['corechart','table']}); +google.setOnLoadCallback(setup_vis); +</script> +</head> + +<body> + + <div class="container_12"> + + <div class="grid_12 header"> + <h2>Codec Comparison Results</h2> + </div> + + <div class="grid_12 radio"> + + <form name="myform"> + Method For Combining Points + <input type="radio" checked name="column" value="1" + onClick="changeMetric('filestable_avg')" />Average of bitrates difference + <input type="radio" name="column" value="2" + onClick="changeMetric('filestable_dsnr')" />BDSNR + <input type="radio" name="column" value="3" + onClick="changeMetric('filestable_drate')" />BDRATE + </form> + + <form id="metrics" name="myform"> + </form> + + </div> + + <div class="grid_12 main"> + + <div class="grid_5 alpha cliplist"> + <div id="bettertable"></div> + </div> + + <div class="grid_5 chartarea"> + <div id="metricgraph"></div> + </div> + + <div class="grid_2 omega indicators"> + <div class="content"> + <h5>Indicators</h5> + <hr> + <div id="status"></div> + </div> + </div> + + </div> + + </div> + +</body> +</html> diff --git a/third_party/aom/test/minmax_test.cc b/third_party/aom/test/minmax_test.cc new file mode 100644 index 0000000000..33be4ff6dc --- /dev/null +++ b/third_party/aom/test/minmax_test.cc @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2023 The WebM project authors. All Rights Reserved. + * Copyright (c) 2023, Alliance for Open Media. All Rights Reserved. + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { + +using ::libaom_test::ACMRandom; + +typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride, const uint8_t *b, + int b_stride, int *min, int *max); + +class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> { + public: + void SetUp() override { + mm_func_ = GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + protected: + MinMaxFunc mm_func_; + ACMRandom rnd_; +}; + +void reference_minmax(const uint8_t *a, int a_stride, const uint8_t *b, + int b_stride, int *min_ret, int *max_ret) { + int min = 255; + int max = 0; + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]); + if (min > diff) min = diff; + if (max < diff) max = diff; + } + } + + *min_ret = min; + *max_ret = max; +} + +TEST_P(MinMaxTest, MinValue) { + for (int i = 0; i < 64; i++) { + uint8_t a[64], b[64]; + memset(a, 0, sizeof(a)); + memset(b, 255, sizeof(b)); + b[i] = i; // Set a minimum difference of i. + + int min, max; + API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(255, max); + EXPECT_EQ(i, min); + } +} + +TEST_P(MinMaxTest, MaxValue) { + for (int i = 0; i < 64; i++) { + uint8_t a[64], b[64]; + memset(a, 0, sizeof(a)); + memset(b, 0, sizeof(b)); + b[i] = i; // Set a maximum difference of i. + + int min, max; + API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(i, max); + EXPECT_EQ(0, min); + } +} + +TEST_P(MinMaxTest, CompareReference) { + uint8_t a[64], b[64]; + for (int j = 0; j < 64; j++) { + a[j] = rnd_.Rand8(); + b[j] = rnd_.Rand8(); + } + + int min_ref, max_ref, min, max; + reference_minmax(a, 8, b, 8, &min_ref, &max_ref); + API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(max_ref, max); + EXPECT_EQ(min_ref, min); +} + +TEST_P(MinMaxTest, CompareReferenceAndVaryStride) { + uint8_t a[8 * 64], b[8 * 64]; + for (int i = 0; i < 8 * 64; i++) { + a[i] = rnd_.Rand8(); + b[i] = rnd_.Rand8(); + } + for (int a_stride = 8; a_stride <= 64; a_stride += 8) { + for (int b_stride = 8; b_stride <= 64; b_stride += 8) { + int min_ref, max_ref, min, max; + reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref); + API_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max)); + EXPECT_EQ(max_ref, max) + << "when a_stride = " << a_stride << " and b_stride = " << b_stride; + EXPECT_EQ(min_ref, min) + << "when a_stride = " << a_stride << " and b_stride = " << b_stride; + } + } +} + +#if CONFIG_AV1_HIGHBITDEPTH + +using HBDMinMaxTest = MinMaxTest; + +void highbd_reference_minmax(const uint8_t *a, int a_stride, const uint8_t *b, + int b_stride, int *min_ret, int *max_ret) { + int min = 65535; + int max = 0; + const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a); + const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b); + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + const int diff = abs(a_ptr[i * a_stride + j] - b_ptr[i * b_stride + j]); + if (min > diff) min = diff; + if (max < diff) max = diff; + } + } + + *min_ret = min; + *max_ret = max; +} + +TEST_P(HBDMinMaxTest, MinValue) { + uint8_t *a = CONVERT_TO_BYTEPTR( + reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t)))); + uint8_t *b = CONVERT_TO_BYTEPTR( + reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t)))); + for (int i = 0; i < 64; i++) { + aom_memset16(CONVERT_TO_SHORTPTR(a), 0, 64); + aom_memset16(CONVERT_TO_SHORTPTR(b), 65535, 64); + CONVERT_TO_SHORTPTR(b)[i] = i; // Set a minimum difference of i. + + int min, max; + API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(65535, max); + EXPECT_EQ(i, min); + } + aom_free(CONVERT_TO_SHORTPTR(a)); + aom_free(CONVERT_TO_SHORTPTR(b)); +} + +TEST_P(HBDMinMaxTest, MaxValue) { + uint8_t *a = CONVERT_TO_BYTEPTR( + reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t)))); + uint8_t *b = CONVERT_TO_BYTEPTR( + reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t)))); + for (int i = 0; i < 64; i++) { + aom_memset16(CONVERT_TO_SHORTPTR(a), 0, 64); + aom_memset16(CONVERT_TO_SHORTPTR(b), 0, 64); + CONVERT_TO_SHORTPTR(b)[i] = i; // Set a minimum difference of i. + + int min, max; + API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + EXPECT_EQ(i, max); + EXPECT_EQ(0, min); + } + aom_free(CONVERT_TO_SHORTPTR(a)); + aom_free(CONVERT_TO_SHORTPTR(b)); +} + +TEST_P(HBDMinMaxTest, CompareReference) { + uint8_t *a = CONVERT_TO_BYTEPTR( + reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t)))); + uint8_t *b = CONVERT_TO_BYTEPTR( + reinterpret_cast<uint16_t *>(aom_malloc(64 * sizeof(uint16_t)))); + for (int j = 0; j < 64; j++) { + CONVERT_TO_SHORTPTR(a)[j] = rnd_.Rand16(); + CONVERT_TO_SHORTPTR(b)[j] = rnd_.Rand16(); + } + + int min_ref, max_ref, min, max; + highbd_reference_minmax(a, 8, b, 8, &min_ref, &max_ref); + API_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); + aom_free(CONVERT_TO_SHORTPTR(a)); + aom_free(CONVERT_TO_SHORTPTR(b)); + EXPECT_EQ(max_ref, max); + EXPECT_EQ(min_ref, min); +} + +TEST_P(HBDMinMaxTest, CompareReferenceAndVaryStride) { + uint8_t *a = CONVERT_TO_BYTEPTR( + reinterpret_cast<uint16_t *>(aom_malloc((8 * 64) * sizeof(uint16_t)))); + uint8_t *b = CONVERT_TO_BYTEPTR( + reinterpret_cast<uint16_t *>(aom_malloc((8 * 64) * sizeof(uint16_t)))); + for (int i = 0; i < 8 * 64; i++) { + CONVERT_TO_SHORTPTR(a)[i] = rnd_.Rand16(); + CONVERT_TO_SHORTPTR(b)[i] = rnd_.Rand16(); + } + for (int a_stride = 8; a_stride <= 64; a_stride += 8) { + for (int b_stride = 8; b_stride <= 64; b_stride += 8) { + int min_ref, max_ref, min, max; + highbd_reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref); + API_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max)); + EXPECT_EQ(max_ref, max) + << "when a_stride = " << a_stride << " and b_stride = " << b_stride; + EXPECT_EQ(min_ref, min) + << "when a_stride = " << a_stride << " and b_stride = " << b_stride; + } + } + aom_free(CONVERT_TO_SHORTPTR(a)); + aom_free(CONVERT_TO_SHORTPTR(b)); +} +#endif // CONFIG_AV1_HIGHBITDEPTH + +INSTANTIATE_TEST_SUITE_P(C, MinMaxTest, ::testing::Values(&aom_minmax_8x8_c)); +#if CONFIG_AV1_HIGHBITDEPTH +INSTANTIATE_TEST_SUITE_P(C, HBDMinMaxTest, + ::testing::Values(&aom_highbd_minmax_8x8_c)); +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, HBDMinMaxTest, + ::testing::Values(&aom_highbd_minmax_8x8_neon)); +#endif +#endif + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, MinMaxTest, + ::testing::Values(&aom_minmax_8x8_sse2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, MinMaxTest, + ::testing::Values(&aom_minmax_8x8_neon)); +#endif +} // namespace diff --git a/third_party/aom/test/monochrome_test.cc b/third_party/aom/test/monochrome_test.cc new file mode 100644 index 0000000000..f22b5fe0f2 --- /dev/null +++ b/third_party/aom/test/monochrome_test.cc @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <climits> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/video_source.h" +#include "test/util.h" + +namespace { + +const unsigned int kCqLevel = 18; +const double kMaxPsnr = 100.0; + +// kPsnrThreshold represents the psnr threshold used to validate the quality of +// the first frame. The indices correspond to one/two-pass, allintra and +// realtime encoding modes. +const double kPsnrThreshold[3] = { 29.0, 41.5, 41.5 }; + +// kPsnrFluctuation represents the maximum allowed psnr fluctuation w.r.t first +// frame. The indices correspond to one/two-pass, allintra and realtime +// encoding modes. +const double kPsnrFluctuation[3] = { 2.5, 0.3, 16.0 }; + +class MonochromeTest + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int, + int>, + public ::libaom_test::EncoderTest { + protected: + MonochromeTest() + : EncoderTest(GET_PARAM(0)), lossless_(GET_PARAM(2)), + frame0_psnr_y_(0.0) {} + + ~MonochromeTest() override = default; + + void SetUp() override { InitializeConfig(GET_PARAM(1)); } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, GET_PARAM(3)); + if (mode_ == ::libaom_test::kAllIntra) { + encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel); + } + if (lossless_) { + encoder->Control(AV1E_SET_LOSSLESS, 1); + } + } + } + + void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) override { + (void)pts; + + // Get value of top-left corner pixel of U plane + int chroma_value = img.planes[AOM_PLANE_U][0]; + + bool is_chroma_constant = + ComparePlaneToValue(img, AOM_PLANE_U, chroma_value) && + ComparePlaneToValue(img, AOM_PLANE_V, chroma_value); + + // Chroma planes should be constant + EXPECT_TRUE(is_chroma_constant); + + // Monochrome flag on image should be set + EXPECT_EQ(img.monochrome, 1); + + chroma_value_list_.push_back(chroma_value); + } + + // Returns true if all pixels on the plane are equal to value, and returns + // false otherwise. + bool ComparePlaneToValue(const aom_image_t &img, const int plane, + const int value) { + const int w = aom_img_plane_width(&img, plane); + const int h = aom_img_plane_height(&img, plane); + const uint8_t *const buf = img.planes[plane]; + const int stride = img.stride[plane]; + + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { + if (buf[r * stride + c] != value) return false; + } + } + return true; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + // Check average PSNR value is >= 100 db in case of lossless encoding. + if (lossless_) { + EXPECT_GE(pkt->data.psnr.psnr[0], kMaxPsnr); + return; + } + const int psnr_index = (mode_ == ::libaom_test::kRealTime) ? 2 + : (mode_ == ::libaom_test::kAllIntra) ? 1 + : 0; + // Check that the initial Y PSNR value is 'high enough', and check that + // subsequent Y PSNR values are 'close' to this initial value. + if (frame0_psnr_y_ == 0.0) { + frame0_psnr_y_ = pkt->data.psnr.psnr[1]; + EXPECT_GT(frame0_psnr_y_, kPsnrThreshold[psnr_index]); + } + EXPECT_NEAR(pkt->data.psnr.psnr[1], frame0_psnr_y_, + kPsnrFluctuation[psnr_index]); + } + + int lossless_; + std::vector<int> chroma_value_list_; + double frame0_psnr_y_; +}; + +TEST_P(MonochromeTest, TestMonochromeEncoding) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 5); + + init_flags_ = AOM_CODEC_USE_PSNR; + + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 1; + cfg_.kf_min_dist = cfg_.kf_max_dist = 3000; + // Enable dropped frames. + cfg_.rc_dropframe_thresh = 1; + // Run at low bitrate. + cfg_.rc_target_bitrate = 40; + // Set monochrome encoding flag + cfg_.monochrome = 1; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Check that the chroma planes are equal across all frames + std::vector<int>::const_iterator iter = chroma_value_list_.begin(); + int initial_chroma_value = *iter; + for (; iter != chroma_value_list_.end(); ++iter) { + // Check that all decoded frames have the same constant chroma planes. + EXPECT_EQ(*iter, initial_chroma_value); + } +} + +class MonochromeAllIntraTest : public MonochromeTest {}; + +TEST_P(MonochromeAllIntraTest, TestMonochromeEncoding) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 5); + init_flags_ = AOM_CODEC_USE_PSNR; + // Set monochrome encoding flag + cfg_.monochrome = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Check that the chroma planes are equal across all frames + std::vector<int>::const_iterator iter = chroma_value_list_.begin(); + int initial_chroma_value = *iter; + for (; iter != chroma_value_list_.end(); ++iter) { + // Check that all decoded frames have the same constant chroma planes. + EXPECT_EQ(*iter, initial_chroma_value); + } +} + +class MonochromeRealtimeTest : public MonochromeTest {}; + +TEST_P(MonochromeRealtimeTest, TestMonochromeEncoding) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 30); + init_flags_ = AOM_CODEC_USE_PSNR; + // Set monochrome encoding flag + cfg_.monochrome = 1; + // Run at low bitrate. + cfg_.rc_target_bitrate = 40; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Check that the chroma planes are equal across all frames + std::vector<int>::const_iterator iter = chroma_value_list_.begin(); + int initial_chroma_value = *iter; + for (; iter != chroma_value_list_.end(); ++iter) { + // Check that all decoded frames have the same constant chroma planes. + EXPECT_EQ(*iter, initial_chroma_value); + } +} + +AV1_INSTANTIATE_TEST_SUITE(MonochromeTest, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood), + ::testing::Values(0), // lossless + ::testing::Values(0)); // cpu_used + +AV1_INSTANTIATE_TEST_SUITE(MonochromeAllIntraTest, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::Values(0, 1), // lossless + ::testing::Values(6, 9)); // cpu_used + +AV1_INSTANTIATE_TEST_SUITE(MonochromeRealtimeTest, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Values(0), // lossless + ::testing::Values(6, 8, 10)); // cpu_used + +} // namespace diff --git a/third_party/aom/test/motion_vector_test.cc b/third_party/aom/test/motion_vector_test.cc new file mode 100644 index 0000000000..4fc8d53d95 --- /dev/null +++ b/third_party/aom/test/motion_vector_test.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/yuv_video_source.h" + +namespace { +#define MAX_EXTREME_MV 1 +#define MIN_EXTREME_MV 2 + +// Encoding modes +const libaom_test::TestMode kEncodingModeVectors[] = { + ::libaom_test::kTwoPassGood, + ::libaom_test::kOnePassGood, +}; + +// Encoding speeds +const int kCpuUsedVectors[] = { 1, 5 }; + +// MV test modes: 1 - always use maximum MV; 2 - always use minimum MV. +const int kMVTestModes[] = { MAX_EXTREME_MV, MIN_EXTREME_MV }; + +class MotionVectorTestLarge + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int, + int>, + public ::libaom_test::EncoderTest { + protected: + MotionVectorTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), mv_test_mode_(GET_PARAM(3)) {} + + ~MotionVectorTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + cfg_.g_lag_in_frames = 3; + } else { + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + } + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, mv_test_mode_); + if (encoding_mode_ != ::libaom_test::kRealTime) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + } + + libaom_test::TestMode encoding_mode_; + int cpu_used_; + int mv_test_mode_; +}; + +TEST_P(MotionVectorTestLarge, OverallTest) { + int width = 3840; + int height = 2160; + + // Reduce the test clip's resolution while testing on 32-bit system. + if (sizeof(void *) == 4) { + width = 2048; + height = 360; + } + + cfg_.rc_target_bitrate = 24000; + cfg_.g_profile = 0; + init_flags_ = AOM_CODEC_USE_PSNR; + + std::unique_ptr<libaom_test::VideoSource> video; + video.reset(new libaom_test::YUVVideoSource( + "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, width, height, 30, 1, 0, 3)); + + ASSERT_NE(video, nullptr); + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); +} + +AV1_INSTANTIATE_TEST_SUITE(MotionVectorTestLarge, + ::testing::ValuesIn(kEncodingModeVectors), + ::testing::ValuesIn(kCpuUsedVectors), + ::testing::ValuesIn(kMVTestModes)); +} // namespace diff --git a/third_party/aom/test/mv_cost_test.cc b/third_party/aom/test/mv_cost_test.cc new file mode 100644 index 0000000000..73d56665bf --- /dev/null +++ b/third_party/aom/test/mv_cost_test.cc @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2022, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "av1/encoder/cost.h" +#include "av1/encoder/encodemv.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +void ReferenceBuildNmvComponentCostTable(int *mvcost, + const nmv_component *const mvcomp, + MvSubpelPrecision precision) { + int i, v; + int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE]; + int bits_cost[MV_OFFSET_BITS][2]; + int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE]; + int class0_hp_cost[2], hp_cost[2]; + av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, nullptr); + av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, nullptr); + av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, nullptr); + for (i = 0; i < MV_OFFSET_BITS; ++i) { + av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], nullptr); + } + for (i = 0; i < CLASS0_SIZE; ++i) + av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i], + nullptr); + av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, nullptr); + if (precision > MV_SUBPEL_LOW_PRECISION) { + av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, nullptr); + av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, nullptr); + } + mvcost[0] = 0; + for (v = 1; v <= MV_MAX; ++v) { + int z, c, o, d, e, f, cost = 0; + z = v - 1; + c = av1_get_mv_class(z, &o); + cost += class_cost[c]; + d = (o >> 3); /* int mv data */ + f = (o >> 1) & 3; /* fractional pel mv data */ + e = (o & 1); /* high precision mv data */ + if (c == MV_CLASS_0) { + cost += class0_cost[d]; + } else { + const int b = c + CLASS0_BITS - 1; /* number of bits */ + for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)]; + } + if (precision > MV_SUBPEL_NONE) { + if (c == MV_CLASS_0) { + cost += class0_fp_cost[d][f]; + } else { + cost += fp_cost[f]; + } + if (precision > MV_SUBPEL_LOW_PRECISION) { + if (c == MV_CLASS_0) { + cost += class0_hp_cost[e]; + } else { + cost += hp_cost[e]; + } + } + } + mvcost[v] = cost + sign_cost[0]; + mvcost[-v] = cost + sign_cost[1]; + } +} + +// Test using the default context, except for sign +static const nmv_component kTestComponentContext = { + { AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757, 32762, + 32767) }, // class_cdf // fp + { { AOM_CDF4(16384, 24576, 26624) }, + { AOM_CDF4(12288, 21248, 24128) } }, // class0_fp_cdf + { AOM_CDF4(8192, 17408, 21248) }, // fp_cdf + { AOM_CDF2(70 * 128) }, // sign_cdf + { AOM_CDF2(160 * 128) }, // class0_hp_cdf + { AOM_CDF2(128 * 128) }, // hp_cdf + { AOM_CDF2(216 * 128) }, // class0_cdf + { { AOM_CDF2(128 * 136) }, + { AOM_CDF2(128 * 140) }, + { AOM_CDF2(128 * 148) }, + { AOM_CDF2(128 * 160) }, + { AOM_CDF2(128 * 176) }, + { AOM_CDF2(128 * 192) }, + { AOM_CDF2(128 * 224) }, + { AOM_CDF2(128 * 234) }, + { AOM_CDF2(128 * 234) }, + { AOM_CDF2(128 * 240) } }, // bits_cdf +}; + +void TestMvComponentCostTable(MvSubpelPrecision precision) { + std::unique_ptr<int[]> mvcost_ref_buf(new int[MV_VALS]); + std::unique_ptr<int[]> mvcost_buf(new int[MV_VALS]); + int *mvcost_ref = mvcost_ref_buf.get() + MV_MAX; + int *mvcost = mvcost_buf.get() + MV_MAX; + + ReferenceBuildNmvComponentCostTable(mvcost_ref, &kTestComponentContext, + precision); + av1_build_nmv_component_cost_table(mvcost, &kTestComponentContext, precision); + + for (int v = 0; v <= MV_MAX; ++v) { + ASSERT_EQ(mvcost_ref[v], mvcost[v]) << "v = " << v; + ASSERT_EQ(mvcost_ref[-v], mvcost[-v]) << "v = " << v; + } +} + +TEST(MvCostTest, BuildMvComponentCostTableTest1) { + TestMvComponentCostTable(MV_SUBPEL_NONE); +} + +TEST(MvCostTest, BuildMvComponentCostTableTest2) { + TestMvComponentCostTable(MV_SUBPEL_LOW_PRECISION); +} + +TEST(MvCostTest, BuildMvComponentCostTableTest3) { + TestMvComponentCostTable(MV_SUBPEL_HIGH_PRECISION); +} + +} // namespace
\ No newline at end of file diff --git a/third_party/aom/test/noise_model_test.cc b/third_party/aom/test/noise_model_test.cc new file mode 100644 index 0000000000..b3edcc218e --- /dev/null +++ b/third_party/aom/test/noise_model_test.cc @@ -0,0 +1,1372 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <limits.h> +#include <math.h> +#include <algorithm> +#include <vector> + +#include "aom_dsp/noise_model.h" +#include "aom_dsp/noise_util.h" +#include "config/aom_dsp_rtcd.h" +#include "test/acm_random.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +// Return normally distrbuted values with standard deviation of sigma. +double randn(libaom_test::ACMRandom *random, double sigma) { + while (true) { + const double u = 2.0 * ((double)random->Rand31() / + testing::internal::Random::kMaxRange) - + 1.0; + const double v = 2.0 * ((double)random->Rand31() / + testing::internal::Random::kMaxRange) - + 1.0; + const double s = u * u + v * v; + if (s > 0 && s < 1) { + return sigma * (u * sqrt(-2.0 * log(s) / s)); + } + } +} + +// Synthesizes noise using the auto-regressive filter of the given lag, +// with the provided n coefficients sampled at the given coords. +void noise_synth(libaom_test::ACMRandom *random, int lag, int n, + const int (*coords)[2], const double *coeffs, double *data, + int w, int h) { + const int pad_size = 3 * lag; + const int padded_w = w + pad_size; + const int padded_h = h + pad_size; + int x = 0, y = 0; + std::vector<double> padded(padded_w * padded_h); + + for (y = 0; y < padded_h; ++y) { + for (x = 0; x < padded_w; ++x) { + padded[y * padded_w + x] = randn(random, 1.0); + } + } + for (y = lag; y < padded_h; ++y) { + for (x = lag; x < padded_w; ++x) { + double sum = 0; + int i = 0; + for (i = 0; i < n; ++i) { + const int dx = coords[i][0]; + const int dy = coords[i][1]; + sum += padded[(y + dy) * padded_w + (x + dx)] * coeffs[i]; + } + padded[y * padded_w + x] += sum; + } + } + // Copy over the padded rows to the output + for (y = 0; y < h; ++y) { + memcpy(data + y * w, &padded[0] + y * padded_w, sizeof(*data) * w); + } +} + +std::vector<float> get_noise_psd(double *noise, int width, int height, + int block_size) { + float *block = + (float *)aom_memalign(32, block_size * block_size * sizeof(block)); + std::vector<float> psd(block_size * block_size); + if (block == nullptr) { + EXPECT_NE(block, nullptr); + return psd; + } + int num_blocks = 0; + struct aom_noise_tx_t *tx = aom_noise_tx_malloc(block_size); + if (tx == nullptr) { + EXPECT_NE(tx, nullptr); + return psd; + } + for (int y = 0; y <= height - block_size; y += block_size / 2) { + for (int x = 0; x <= width - block_size; x += block_size / 2) { + for (int yy = 0; yy < block_size; ++yy) { + for (int xx = 0; xx < block_size; ++xx) { + block[yy * block_size + xx] = (float)noise[(y + yy) * width + x + xx]; + } + } + aom_noise_tx_forward(tx, &block[0]); + aom_noise_tx_add_energy(tx, &psd[0]); + num_blocks++; + } + } + for (int yy = 0; yy < block_size; ++yy) { + for (int xx = 0; xx <= block_size / 2; ++xx) { + psd[yy * block_size + xx] /= num_blocks; + } + } + // Fill in the data that is missing due to symmetries + for (int xx = 1; xx < block_size / 2; ++xx) { + psd[(block_size - xx)] = psd[xx]; + } + for (int yy = 1; yy < block_size; ++yy) { + for (int xx = 1; xx < block_size / 2; ++xx) { + psd[(block_size - yy) * block_size + (block_size - xx)] = + psd[yy * block_size + xx]; + } + } + aom_noise_tx_free(tx); + aom_free(block); + return psd; +} + +} // namespace + +TEST(NoiseStrengthSolver, GetCentersTwoBins) { + aom_noise_strength_solver_t solver; + aom_noise_strength_solver_init(&solver, 2, 8); + EXPECT_NEAR(0, aom_noise_strength_solver_get_center(&solver, 0), 1e-5); + EXPECT_NEAR(255, aom_noise_strength_solver_get_center(&solver, 1), 1e-5); + aom_noise_strength_solver_free(&solver); +} + +TEST(NoiseStrengthSolver, GetCentersTwoBins10bit) { + aom_noise_strength_solver_t solver; + aom_noise_strength_solver_init(&solver, 2, 10); + EXPECT_NEAR(0, aom_noise_strength_solver_get_center(&solver, 0), 1e-5); + EXPECT_NEAR(1023, aom_noise_strength_solver_get_center(&solver, 1), 1e-5); + aom_noise_strength_solver_free(&solver); +} + +TEST(NoiseStrengthSolver, GetCenters256Bins) { + const int num_bins = 256; + aom_noise_strength_solver_t solver; + aom_noise_strength_solver_init(&solver, num_bins, 8); + + for (int i = 0; i < 256; ++i) { + EXPECT_NEAR(i, aom_noise_strength_solver_get_center(&solver, i), 1e-5); + } + aom_noise_strength_solver_free(&solver); +} + +// Tests that the noise strength solver returns the identity transform when +// given identity-like constraints. +TEST(NoiseStrengthSolver, ObserveIdentity) { + const int num_bins = 256; + aom_noise_strength_solver_t solver; + ASSERT_EQ(1, aom_noise_strength_solver_init(&solver, num_bins, 8)); + + // We have to add a big more strength to constraints at the boundary to + // overcome any regularization. + for (int j = 0; j < 5; ++j) { + aom_noise_strength_solver_add_measurement(&solver, 0, 0); + aom_noise_strength_solver_add_measurement(&solver, 255, 255); + } + for (int i = 0; i < 256; ++i) { + aom_noise_strength_solver_add_measurement(&solver, i, i); + } + EXPECT_EQ(1, aom_noise_strength_solver_solve(&solver)); + for (int i = 2; i < num_bins - 2; ++i) { + EXPECT_NEAR(i, solver.eqns.x[i], 0.1); + } + + aom_noise_strength_lut_t lut; + EXPECT_EQ(1, aom_noise_strength_solver_fit_piecewise(&solver, 2, &lut)); + + ASSERT_EQ(2, lut.num_points); + EXPECT_NEAR(0.0, lut.points[0][0], 1e-5); + EXPECT_NEAR(0.0, lut.points[0][1], 0.5); + EXPECT_NEAR(255.0, lut.points[1][0], 1e-5); + EXPECT_NEAR(255.0, lut.points[1][1], 0.5); + + aom_noise_strength_lut_free(&lut); + aom_noise_strength_solver_free(&solver); +} + +TEST(NoiseStrengthSolver, SimplifiesCurve) { + const int num_bins = 256; + aom_noise_strength_solver_t solver; + EXPECT_EQ(1, aom_noise_strength_solver_init(&solver, num_bins, 8)); + + // Create a parabolic input + for (int i = 0; i < 256; ++i) { + const double x = (i - 127.5) / 63.5; + aom_noise_strength_solver_add_measurement(&solver, i, x * x); + } + EXPECT_EQ(1, aom_noise_strength_solver_solve(&solver)); + + // First try to fit an unconstrained lut + aom_noise_strength_lut_t lut; + EXPECT_EQ(1, aom_noise_strength_solver_fit_piecewise(&solver, -1, &lut)); + ASSERT_LE(20, lut.num_points); + aom_noise_strength_lut_free(&lut); + + // Now constrain the maximum number of points + const int kMaxPoints = 9; + EXPECT_EQ(1, + aom_noise_strength_solver_fit_piecewise(&solver, kMaxPoints, &lut)); + ASSERT_EQ(kMaxPoints, lut.num_points); + + // Check that the input parabola is still well represented + EXPECT_NEAR(0.0, lut.points[0][0], 1e-5); + EXPECT_NEAR(4.0, lut.points[0][1], 0.1); + for (int i = 1; i < lut.num_points - 1; ++i) { + const double x = (lut.points[i][0] - 128.) / 64.; + EXPECT_NEAR(x * x, lut.points[i][1], 0.1); + } + EXPECT_NEAR(255.0, lut.points[kMaxPoints - 1][0], 1e-5); + + EXPECT_NEAR(4.0, lut.points[kMaxPoints - 1][1], 0.1); + aom_noise_strength_lut_free(&lut); + aom_noise_strength_solver_free(&solver); +} + +TEST(NoiseStrengthLut, LutInitNegativeOrZeroSize) { + aom_noise_strength_lut_t lut; + ASSERT_FALSE(aom_noise_strength_lut_init(&lut, -1)); + ASSERT_FALSE(aom_noise_strength_lut_init(&lut, 0)); +} + +TEST(NoiseStrengthLut, LutEvalSinglePoint) { + aom_noise_strength_lut_t lut; + ASSERT_TRUE(aom_noise_strength_lut_init(&lut, 1)); + ASSERT_EQ(1, lut.num_points); + lut.points[0][0] = 0; + lut.points[0][1] = 1; + EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, -1)); + EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, 0)); + EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, 1)); + aom_noise_strength_lut_free(&lut); +} + +TEST(NoiseStrengthLut, LutEvalMultiPointInterp) { + const double kEps = 1e-5; + aom_noise_strength_lut_t lut; + ASSERT_TRUE(aom_noise_strength_lut_init(&lut, 4)); + ASSERT_EQ(4, lut.num_points); + + lut.points[0][0] = 0; + lut.points[0][1] = 0; + + lut.points[1][0] = 1; + lut.points[1][1] = 1; + + lut.points[2][0] = 2; + lut.points[2][1] = 1; + + lut.points[3][0] = 100; + lut.points[3][1] = 1001; + + // Test lower boundary + EXPECT_EQ(0, aom_noise_strength_lut_eval(&lut, -1)); + EXPECT_EQ(0, aom_noise_strength_lut_eval(&lut, 0)); + + // Test first part that should be identity + EXPECT_NEAR(0.25, aom_noise_strength_lut_eval(&lut, 0.25), kEps); + EXPECT_NEAR(0.75, aom_noise_strength_lut_eval(&lut, 0.75), kEps); + + // This is a constant section (should evaluate to 1) + EXPECT_NEAR(1.0, aom_noise_strength_lut_eval(&lut, 1.25), kEps); + EXPECT_NEAR(1.0, aom_noise_strength_lut_eval(&lut, 1.75), kEps); + + // Test interpolation between to non-zero y coords. + EXPECT_NEAR(1, aom_noise_strength_lut_eval(&lut, 2), kEps); + EXPECT_NEAR(251, aom_noise_strength_lut_eval(&lut, 26.5), kEps); + EXPECT_NEAR(751, aom_noise_strength_lut_eval(&lut, 75.5), kEps); + + // Test upper boundary + EXPECT_EQ(1001, aom_noise_strength_lut_eval(&lut, 100)); + EXPECT_EQ(1001, aom_noise_strength_lut_eval(&lut, 101)); + + aom_noise_strength_lut_free(&lut); +} + +TEST(NoiseModel, InitSuccessWithValidSquareShape) { + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 2, 8, 0 }; + aom_noise_model_t model; + + EXPECT_TRUE(aom_noise_model_init(&model, params)); + + const int kNumCoords = 12; + const int kCoords[][2] = { { -2, -2 }, { -1, -2 }, { 0, -2 }, { 1, -2 }, + { 2, -2 }, { -2, -1 }, { -1, -1 }, { 0, -1 }, + { 1, -1 }, { 2, -1 }, { -2, 0 }, { -1, 0 } }; + EXPECT_EQ(kNumCoords, model.n); + for (int i = 0; i < kNumCoords; ++i) { + const int *coord = kCoords[i]; + EXPECT_EQ(coord[0], model.coords[i][0]); + EXPECT_EQ(coord[1], model.coords[i][1]); + } + aom_noise_model_free(&model); +} + +TEST(NoiseModel, InitSuccessWithValidDiamondShape) { + aom_noise_model_t model; + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_DIAMOND, 2, 8, 0 }; + EXPECT_TRUE(aom_noise_model_init(&model, params)); + EXPECT_EQ(6, model.n); + const int kNumCoords = 6; + const int kCoords[][2] = { { 0, -2 }, { -1, -1 }, { 0, -1 }, + { 1, -1 }, { -2, 0 }, { -1, 0 } }; + EXPECT_EQ(kNumCoords, model.n); + for (int i = 0; i < kNumCoords; ++i) { + const int *coord = kCoords[i]; + EXPECT_EQ(coord[0], model.coords[i][0]); + EXPECT_EQ(coord[1], model.coords[i][1]); + } + aom_noise_model_free(&model); +} + +TEST(NoiseModel, InitFailsWithTooLargeLag) { + aom_noise_model_t model; + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 10, 8, 0 }; + EXPECT_FALSE(aom_noise_model_init(&model, params)); + aom_noise_model_free(&model); +} + +TEST(NoiseModel, InitFailsWithTooSmallLag) { + aom_noise_model_t model; + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 0, 8, 0 }; + EXPECT_FALSE(aom_noise_model_init(&model, params)); + aom_noise_model_free(&model); +} + +TEST(NoiseModel, InitFailsWithInvalidShape) { + aom_noise_model_t model; + aom_noise_model_params_t params = { aom_noise_shape(100), 3, 8, 0 }; + EXPECT_FALSE(aom_noise_model_init(&model, params)); + aom_noise_model_free(&model); +} + +TEST(NoiseModel, InitFailsWithInvalidBitdepth) { + aom_noise_model_t model; + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 2, 8, 0 }; + for (int i = 0; i <= 32; ++i) { + params.bit_depth = i; + if (i == 8 || i == 10 || i == 12) { + EXPECT_TRUE(aom_noise_model_init(&model, params)) << "bit_depth: " << i; + aom_noise_model_free(&model); + } else { + EXPECT_FALSE(aom_noise_model_init(&model, params)) << "bit_depth: " << i; + } + } + params.bit_depth = INT_MAX; + EXPECT_FALSE(aom_noise_model_init(&model, params)); +} + +// A container template class to hold a data type and extra arguments. +// All of these args are bundled into one struct so that we can use +// parameterized tests on combinations of supported data types +// (uint8_t and uint16_t) and bit depths (8, 10, 12). +template <typename T, int bit_depth, bool use_highbd> +struct BitDepthParams { + typedef T data_type_t; + static const int kBitDepth = bit_depth; + static const bool kUseHighBD = use_highbd; +}; + +template <typename T> +class FlatBlockEstimatorTest : public ::testing::Test, public T { + public: + void SetUp() override { random_.Reset(171); } + typedef std::vector<typename T::data_type_t> VecType; + VecType data_; + libaom_test::ACMRandom random_; +}; + +TYPED_TEST_SUITE_P(FlatBlockEstimatorTest); + +TYPED_TEST_P(FlatBlockEstimatorTest, ExtractBlock) { + const int kBlockSize = 16; + aom_flat_block_finder_t flat_block_finder; + ASSERT_EQ(1, aom_flat_block_finder_init(&flat_block_finder, kBlockSize, + this->kBitDepth, this->kUseHighBD)); + const double normalization = flat_block_finder.normalization; + + // Test with an image of more than one block. + const int h = 2 * kBlockSize; + const int w = 2 * kBlockSize; + const int stride = 2 * kBlockSize; + this->data_.resize(h * stride, 128); + + // Set up the (0,0) block to be a plane and the (0,1) block to be a + // checkerboard + const int shift = this->kBitDepth - 8; + for (int y = 0; y < kBlockSize; ++y) { + for (int x = 0; x < kBlockSize; ++x) { + this->data_[y * stride + x] = (-y + x + 128) << shift; + this->data_[y * stride + x + kBlockSize] = + ((x % 2 + y % 2) % 2 ? 128 - 20 : 128 + 20) << shift; + } + } + std::vector<double> block(kBlockSize * kBlockSize, 1); + std::vector<double> plane(kBlockSize * kBlockSize, 1); + + // The block data should be a constant (zero) and the rest of the plane + // trend is covered in the plane data. + aom_flat_block_finder_extract_block(&flat_block_finder, + (uint8_t *)&this->data_[0], w, h, stride, + 0, 0, &plane[0], &block[0]); + for (int y = 0; y < kBlockSize; ++y) { + for (int x = 0; x < kBlockSize; ++x) { + EXPECT_NEAR(0, block[y * kBlockSize + x], 1e-5); + EXPECT_NEAR((double)(this->data_[y * stride + x]) / normalization, + plane[y * kBlockSize + x], 1e-5); + } + } + + // The plane trend is a constant, and the block is a zero mean checkerboard. + aom_flat_block_finder_extract_block(&flat_block_finder, + (uint8_t *)&this->data_[0], w, h, stride, + kBlockSize, 0, &plane[0], &block[0]); + const int mid = 128 << shift; + for (int y = 0; y < kBlockSize; ++y) { + for (int x = 0; x < kBlockSize; ++x) { + EXPECT_NEAR(((double)this->data_[y * stride + x + kBlockSize] - mid) / + normalization, + block[y * kBlockSize + x], 1e-5); + EXPECT_NEAR(mid / normalization, plane[y * kBlockSize + x], 1e-5); + } + } + aom_flat_block_finder_free(&flat_block_finder); +} + +TYPED_TEST_P(FlatBlockEstimatorTest, FindFlatBlocks) { + const int kBlockSize = 32; + aom_flat_block_finder_t flat_block_finder; + ASSERT_EQ(1, aom_flat_block_finder_init(&flat_block_finder, kBlockSize, + this->kBitDepth, this->kUseHighBD)); + + const int num_blocks_w = 8; + const int h = kBlockSize; + const int w = kBlockSize * num_blocks_w; + const int stride = w; + this->data_.resize(h * stride, 128); + std::vector<uint8_t> flat_blocks(num_blocks_w, 0); + + const int shift = this->kBitDepth - 8; + for (int y = 0; y < kBlockSize; ++y) { + for (int x = 0; x < kBlockSize; ++x) { + // Block 0 (not flat): constant doesn't have enough variance to qualify + this->data_[y * stride + x + 0 * kBlockSize] = 128 << shift; + + // Block 1 (not flat): too high of variance is hard to validate as flat + this->data_[y * stride + x + 1 * kBlockSize] = + ((uint8_t)(128 + randn(&this->random_, 5))) << shift; + + // Block 2 (flat): slight checkerboard added to constant + const int check = (x % 2 + y % 2) % 2 ? -2 : 2; + this->data_[y * stride + x + 2 * kBlockSize] = (128 + check) << shift; + + // Block 3 (flat): planar block with checkerboard pattern is also flat + this->data_[y * stride + x + 3 * kBlockSize] = + (y * 2 - x / 2 + 128 + check) << shift; + + // Block 4 (flat): gaussian random with standard deviation 1. + this->data_[y * stride + x + 4 * kBlockSize] = + ((uint8_t)(randn(&this->random_, 1) + x + 128.0)) << shift; + + // Block 5 (flat): gaussian random with standard deviation 2. + this->data_[y * stride + x + 5 * kBlockSize] = + ((uint8_t)(randn(&this->random_, 2) + y + 128.0)) << shift; + + // Block 6 (not flat): too high of directional gradient. + const int strong_edge = x > kBlockSize / 2 ? 64 : 0; + this->data_[y * stride + x + 6 * kBlockSize] = + ((uint8_t)(randn(&this->random_, 1) + strong_edge + 128.0)) << shift; + + // Block 7 (not flat): too high gradient. + const int big_check = ((x >> 2) % 2 + (y >> 2) % 2) % 2 ? -16 : 16; + this->data_[y * stride + x + 7 * kBlockSize] = + ((uint8_t)(randn(&this->random_, 1) + big_check + 128.0)) << shift; + } + } + + EXPECT_EQ(4, aom_flat_block_finder_run(&flat_block_finder, + (uint8_t *)&this->data_[0], w, h, + stride, &flat_blocks[0])); + + // First two blocks are not flat + EXPECT_EQ(0, flat_blocks[0]); + EXPECT_EQ(0, flat_blocks[1]); + + // Next 4 blocks are flat. + EXPECT_EQ(255, flat_blocks[2]); + EXPECT_EQ(255, flat_blocks[3]); + EXPECT_EQ(255, flat_blocks[4]); + EXPECT_EQ(255, flat_blocks[5]); + + // Last 2 are not flat by threshold + EXPECT_EQ(0, flat_blocks[6]); + EXPECT_EQ(0, flat_blocks[7]); + + // Add the noise from non-flat block 1 to every block. + for (int y = 0; y < kBlockSize; ++y) { + for (int x = 0; x < kBlockSize * num_blocks_w; ++x) { + this->data_[y * stride + x] += + (this->data_[y * stride + x % kBlockSize + kBlockSize] - + (128 << shift)); + } + } + // Now the scored selection will pick the one that is most likely flat (block + // 0) + EXPECT_EQ(1, aom_flat_block_finder_run(&flat_block_finder, + (uint8_t *)&this->data_[0], w, h, + stride, &flat_blocks[0])); + EXPECT_EQ(1, flat_blocks[0]); + EXPECT_EQ(0, flat_blocks[1]); + EXPECT_EQ(0, flat_blocks[2]); + EXPECT_EQ(0, flat_blocks[3]); + EXPECT_EQ(0, flat_blocks[4]); + EXPECT_EQ(0, flat_blocks[5]); + EXPECT_EQ(0, flat_blocks[6]); + EXPECT_EQ(0, flat_blocks[7]); + + aom_flat_block_finder_free(&flat_block_finder); +} + +REGISTER_TYPED_TEST_SUITE_P(FlatBlockEstimatorTest, ExtractBlock, + FindFlatBlocks); + +typedef ::testing::Types<BitDepthParams<uint8_t, 8, false>, // lowbd + BitDepthParams<uint16_t, 8, true>, // lowbd in 16-bit + BitDepthParams<uint16_t, 10, true>, // highbd data + BitDepthParams<uint16_t, 12, true> > + AllBitDepthParams; +INSTANTIATE_TYPED_TEST_SUITE_P(FlatBlockInstatiation, FlatBlockEstimatorTest, + AllBitDepthParams); + +template <typename T> +class NoiseModelUpdateTest : public ::testing::Test, public T { + public: + static const int kWidth = 128; + static const int kHeight = 128; + static const int kBlockSize = 16; + static const int kNumBlocksX = kWidth / kBlockSize; + static const int kNumBlocksY = kHeight / kBlockSize; + + void SetUp() override { + const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3, + T::kBitDepth, T::kUseHighBD }; + ASSERT_TRUE(aom_noise_model_init(&model_, params)); + + random_.Reset(100171); + + data_.resize(kWidth * kHeight * 3); + denoised_.resize(kWidth * kHeight * 3); + noise_.resize(kWidth * kHeight * 3); + renoise_.resize(kWidth * kHeight); + flat_blocks_.resize(kNumBlocksX * kNumBlocksY); + + for (int c = 0, offset = 0; c < 3; ++c, offset += kWidth * kHeight) { + data_ptr_[c] = &data_[offset]; + noise_ptr_[c] = &noise_[offset]; + denoised_ptr_[c] = &denoised_[offset]; + strides_[c] = kWidth; + + data_ptr_raw_[c] = (uint8_t *)&data_[offset]; + denoised_ptr_raw_[c] = (uint8_t *)&denoised_[offset]; + } + chroma_sub_[0] = 0; + chroma_sub_[1] = 0; + } + + int NoiseModelUpdate(int block_size = kBlockSize) { + return aom_noise_model_update(&model_, data_ptr_raw_, denoised_ptr_raw_, + kWidth, kHeight, strides_, chroma_sub_, + &flat_blocks_[0], block_size); + } + + void TearDown() override { aom_noise_model_free(&model_); } + + protected: + aom_noise_model_t model_; + std::vector<typename T::data_type_t> data_; + std::vector<typename T::data_type_t> denoised_; + + std::vector<double> noise_; + std::vector<double> renoise_; + std::vector<uint8_t> flat_blocks_; + + typename T::data_type_t *data_ptr_[3]; + typename T::data_type_t *denoised_ptr_[3]; + + double *noise_ptr_[3]; + int strides_[3]; + int chroma_sub_[2]; + libaom_test::ACMRandom random_; + + private: + uint8_t *data_ptr_raw_[3]; + uint8_t *denoised_ptr_raw_[3]; +}; + +TYPED_TEST_SUITE_P(NoiseModelUpdateTest); + +TYPED_TEST_P(NoiseModelUpdateTest, UpdateFailsNoFlatBlocks) { + EXPECT_EQ(AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS, + this->NoiseModelUpdate()); +} + +TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForZeroNoiseAllFlat) { + this->flat_blocks_.assign(this->flat_blocks_.size(), 1); + this->denoised_.assign(this->denoised_.size(), 128); + this->data_.assign(this->denoised_.size(), 128); + EXPECT_EQ(AOM_NOISE_STATUS_INTERNAL_ERROR, this->NoiseModelUpdate()); +} + +TYPED_TEST_P(NoiseModelUpdateTest, UpdateFailsBlockSizeTooSmall) { + this->flat_blocks_.assign(this->flat_blocks_.size(), 1); + this->denoised_.assign(this->denoised_.size(), 128); + this->data_.assign(this->denoised_.size(), 128); + EXPECT_EQ(AOM_NOISE_STATUS_INVALID_ARGUMENT, + this->NoiseModelUpdate(6 /* block_size=6 is too small*/)); +} + +TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForWhiteRandomNoise) { + aom_noise_model_t &model = this->model_; + const int width = this->kWidth; + const int height = this->kHeight; + + const int shift = this->kBitDepth - 8; + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + this->data_ptr_[0][y * width + x] = int(64 + y + randn(&this->random_, 1)) + << shift; + this->denoised_ptr_[0][y * width + x] = (64 + y) << shift; + // Make the chroma planes completely correlated with the Y plane + for (int c = 1; c < 3; ++c) { + this->data_ptr_[c][y * width + x] = this->data_ptr_[0][y * width + x]; + this->denoised_ptr_[c][y * width + x] = + this->denoised_ptr_[0][y * width + x]; + } + } + } + this->flat_blocks_.assign(this->flat_blocks_.size(), 1); + EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate()); + + const double kCoeffEps = 0.075; + const int n = model.n; + for (int c = 0; c < 3; ++c) { + for (int i = 0; i < n; ++i) { + EXPECT_NEAR(0, model.latest_state[c].eqns.x[i], kCoeffEps); + EXPECT_NEAR(0, model.combined_state[c].eqns.x[i], kCoeffEps); + } + // The second and third channels are highly correlated with the first. + if (c > 0) { + ASSERT_EQ(n + 1, model.latest_state[c].eqns.n); + ASSERT_EQ(n + 1, model.combined_state[c].eqns.n); + + EXPECT_NEAR(1, model.latest_state[c].eqns.x[n], kCoeffEps); + EXPECT_NEAR(1, model.combined_state[c].eqns.x[n], kCoeffEps); + } + } + + // The fitted noise strength should be close to the standard deviation + // for all intensity bins. + const double kStdEps = 0.1; + const double normalize = 1 << shift; + + for (int i = 0; i < model.latest_state[0].strength_solver.eqns.n; ++i) { + EXPECT_NEAR(1.0, + model.latest_state[0].strength_solver.eqns.x[i] / normalize, + kStdEps); + EXPECT_NEAR(1.0, + model.combined_state[0].strength_solver.eqns.x[i] / normalize, + kStdEps); + } + + aom_noise_strength_lut_t lut; + aom_noise_strength_solver_fit_piecewise( + &model.latest_state[0].strength_solver, -1, &lut); + ASSERT_EQ(2, lut.num_points); + EXPECT_NEAR(0.0, lut.points[0][0], 1e-5); + EXPECT_NEAR(1.0, lut.points[0][1] / normalize, kStdEps); + EXPECT_NEAR((1 << this->kBitDepth) - 1, lut.points[1][0], 1e-5); + EXPECT_NEAR(1.0, lut.points[1][1] / normalize, kStdEps); + aom_noise_strength_lut_free(&lut); +} + +TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForScaledWhiteNoise) { + aom_noise_model_t &model = this->model_; + const int width = this->kWidth; + const int height = this->kHeight; + + const double kCoeffEps = 0.055; + const double kLowStd = 1; + const double kHighStd = 4; + const int shift = this->kBitDepth - 8; + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < 3; ++c) { + // The image data is bimodal: + // Bottom half has low intensity and low noise strength + // Top half has high intensity and high noise strength + const int avg = (y < height / 2) ? 4 : 245; + const double std = (y < height / 2) ? kLowStd : kHighStd; + this->data_ptr_[c][y * width + x] = + ((uint8_t)std::min((int)255, + (int)(2 + avg + randn(&this->random_, std)))) + << shift; + this->denoised_ptr_[c][y * width + x] = (2 + avg) << shift; + } + } + } + // Label all blocks as flat for the update + this->flat_blocks_.assign(this->flat_blocks_.size(), 1); + EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate()); + + const int n = model.n; + // The noise is uncorrelated spatially and with the y channel. + // All coefficients should be reasonably close to zero. + for (int c = 0; c < 3; ++c) { + for (int i = 0; i < n; ++i) { + EXPECT_NEAR(0, model.latest_state[c].eqns.x[i], kCoeffEps); + EXPECT_NEAR(0, model.combined_state[c].eqns.x[i], kCoeffEps); + } + if (c > 0) { + ASSERT_EQ(n + 1, model.latest_state[c].eqns.n); + ASSERT_EQ(n + 1, model.combined_state[c].eqns.n); + + // The correlation to the y channel should be low (near zero) + EXPECT_NEAR(0, model.latest_state[c].eqns.x[n], kCoeffEps); + EXPECT_NEAR(0, model.combined_state[c].eqns.x[n], kCoeffEps); + } + } + + // Noise strength should vary between kLowStd and kHighStd. + const double kStdEps = 0.15; + // We have to normalize fitted standard deviation based on bit depth. + const double normalize = (1 << shift); + + ASSERT_EQ(20, model.latest_state[0].strength_solver.eqns.n); + for (int i = 0; i < model.latest_state[0].strength_solver.eqns.n; ++i) { + const double a = i / 19.0; + const double expected = (kLowStd * (1.0 - a) + kHighStd * a); + EXPECT_NEAR(expected, + model.latest_state[0].strength_solver.eqns.x[i] / normalize, + kStdEps); + EXPECT_NEAR(expected, + model.combined_state[0].strength_solver.eqns.x[i] / normalize, + kStdEps); + } + + // If we fit a piecewise linear model, there should be two points: + // one near kLowStd at 0, and the other near kHighStd and 255. + aom_noise_strength_lut_t lut; + aom_noise_strength_solver_fit_piecewise( + &model.latest_state[0].strength_solver, 2, &lut); + ASSERT_EQ(2, lut.num_points); + EXPECT_NEAR(0, lut.points[0][0], 1e-4); + EXPECT_NEAR(kLowStd, lut.points[0][1] / normalize, kStdEps); + EXPECT_NEAR((1 << this->kBitDepth) - 1, lut.points[1][0], 1e-5); + EXPECT_NEAR(kHighStd, lut.points[1][1] / normalize, kStdEps); + aom_noise_strength_lut_free(&lut); +} + +TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForCorrelatedNoise) { + aom_noise_model_t &model = this->model_; + const int width = this->kWidth; + const int height = this->kHeight; + const int kNumCoeffs = 24; + const double kStd = 4; + const double kStdEps = 0.3; + const double kCoeffEps = 0.065; + // Use different coefficients for each channel + const double kCoeffs[3][24] = { + { 0.02884, -0.03356, 0.00633, 0.01757, 0.02849, -0.04620, + 0.02833, -0.07178, 0.07076, -0.11603, -0.10413, -0.16571, + 0.05158, -0.07969, 0.02640, -0.07191, 0.02530, 0.41968, + 0.21450, -0.00702, -0.01401, -0.03676, -0.08713, 0.44196 }, + { 0.00269, -0.01291, -0.01513, 0.07234, 0.03208, 0.00477, + 0.00226, -0.00254, 0.03533, 0.12841, -0.25970, -0.06336, + 0.05238, -0.00845, -0.03118, 0.09043, -0.36558, 0.48903, + 0.00595, -0.11938, 0.02106, 0.095956, -0.350139, 0.59305 }, + { -0.00643, -0.01080, -0.01466, 0.06951, 0.03707, -0.00482, + 0.00817, -0.00909, 0.02949, 0.12181, -0.25210, -0.07886, + 0.06083, -0.01210, -0.03108, 0.08944, -0.35875, 0.49150, + 0.00415, -0.12905, 0.02870, 0.09740, -0.34610, 0.58824 }, + }; + + ASSERT_EQ(model.n, kNumCoeffs); + this->chroma_sub_[0] = this->chroma_sub_[1] = 1; + + this->flat_blocks_.assign(this->flat_blocks_.size(), 1); + + // Add different noise onto each plane + const int shift = this->kBitDepth - 8; + for (int c = 0; c < 3; ++c) { + noise_synth(&this->random_, model.params.lag, model.n, model.coords, + kCoeffs[c], this->noise_ptr_[c], width, height); + const int x_shift = c > 0 ? this->chroma_sub_[0] : 0; + const int y_shift = c > 0 ? this->chroma_sub_[1] : 0; + for (int y = 0; y < (height >> y_shift); ++y) { + for (int x = 0; x < (width >> x_shift); ++x) { + const uint8_t value = 64 + x / 2 + y / 4; + this->data_ptr_[c][y * width + x] = + (uint8_t(value + this->noise_ptr_[c][y * width + x] * kStd)) + << shift; + this->denoised_ptr_[c][y * width + x] = value << shift; + } + } + } + EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate()); + + // For the Y plane, the solved coefficients should be close to the original + const int n = model.n; + for (int c = 0; c < 3; ++c) { + for (int i = 0; i < n; ++i) { + EXPECT_NEAR(kCoeffs[c][i], model.latest_state[c].eqns.x[i], kCoeffEps); + EXPECT_NEAR(kCoeffs[c][i], model.combined_state[c].eqns.x[i], kCoeffEps); + } + // The chroma planes should be uncorrelated with the luma plane + if (c > 0) { + EXPECT_NEAR(0, model.latest_state[c].eqns.x[n], kCoeffEps); + EXPECT_NEAR(0, model.combined_state[c].eqns.x[n], kCoeffEps); + } + // Correlation between the coefficient vector and the fitted coefficients + // should be close to 1. + EXPECT_LT(0.98, aom_normalized_cross_correlation( + model.latest_state[c].eqns.x, kCoeffs[c], kNumCoeffs)); + + noise_synth(&this->random_, model.params.lag, model.n, model.coords, + model.latest_state[c].eqns.x, &this->renoise_[0], width, + height); + + EXPECT_TRUE(aom_noise_data_validate(&this->renoise_[0], width, height)); + } + + // Check fitted noise strength + const double normalize = 1 << shift; + for (int c = 0; c < 3; ++c) { + for (int i = 0; i < model.latest_state[c].strength_solver.eqns.n; ++i) { + EXPECT_NEAR(kStd, + model.latest_state[c].strength_solver.eqns.x[i] / normalize, + kStdEps); + } + } +} + +TYPED_TEST_P(NoiseModelUpdateTest, + NoiseStrengthChangeSignalsDifferentNoiseType) { + aom_noise_model_t &model = this->model_; + const int width = this->kWidth; + const int height = this->kHeight; + const int block_size = this->kBlockSize; + // Create a gradient image with std = 2 uncorrelated noise + const double kStd = 2; + const int shift = this->kBitDepth - 8; + + for (int i = 0; i < width * height; ++i) { + const uint8_t val = (i % width) < width / 2 ? 64 : 192; + for (int c = 0; c < 3; ++c) { + this->noise_ptr_[c][i] = randn(&this->random_, 1); + this->data_ptr_[c][i] = ((uint8_t)(this->noise_ptr_[c][i] * kStd + val)) + << shift; + this->denoised_ptr_[c][i] = val << shift; + } + } + this->flat_blocks_.assign(this->flat_blocks_.size(), 1); + EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate()); + + const int kNumBlocks = width * height / block_size / block_size; + EXPECT_EQ(kNumBlocks, model.latest_state[0].strength_solver.num_equations); + EXPECT_EQ(kNumBlocks, model.latest_state[1].strength_solver.num_equations); + EXPECT_EQ(kNumBlocks, model.latest_state[2].strength_solver.num_equations); + EXPECT_EQ(kNumBlocks, model.combined_state[0].strength_solver.num_equations); + EXPECT_EQ(kNumBlocks, model.combined_state[1].strength_solver.num_equations); + EXPECT_EQ(kNumBlocks, model.combined_state[2].strength_solver.num_equations); + + // Bump up noise by an insignificant amount + for (int i = 0; i < width * height; ++i) { + const uint8_t val = (i % width) < width / 2 ? 64 : 192; + this->data_ptr_[0][i] = + ((uint8_t)(this->noise_ptr_[0][i] * (kStd + 0.085) + val)) << shift; + } + EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate()); + + const double kARGainTolerance = 0.02; + for (int c = 0; c < 3; ++c) { + EXPECT_EQ(kNumBlocks, model.latest_state[c].strength_solver.num_equations); + EXPECT_EQ(15250, model.latest_state[c].num_observations); + EXPECT_NEAR(1, model.latest_state[c].ar_gain, kARGainTolerance); + + EXPECT_EQ(2 * kNumBlocks, + model.combined_state[c].strength_solver.num_equations); + EXPECT_EQ(2 * 15250, model.combined_state[c].num_observations); + EXPECT_NEAR(1, model.combined_state[c].ar_gain, kARGainTolerance); + } + + // Bump up the noise strength on half the image for one channel by a + // significant amount. + for (int i = 0; i < width * height; ++i) { + const uint8_t val = (i % width) < width / 2 ? 64 : 128; + if (i % width < width / 2) { + this->data_ptr_[0][i] = + ((uint8_t)(randn(&this->random_, kStd + 0.5) + val)) << shift; + } + } + EXPECT_EQ(AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE, this->NoiseModelUpdate()); + + // Since we didn't update the combined state, it should still be at 2 * + // num_blocks + EXPECT_EQ(kNumBlocks, model.latest_state[0].strength_solver.num_equations); + EXPECT_EQ(2 * kNumBlocks, + model.combined_state[0].strength_solver.num_equations); + + // In normal operation, the "latest" estimate can be saved to the "combined" + // state for continued updates. + aom_noise_model_save_latest(&model); + for (int c = 0; c < 3; ++c) { + EXPECT_EQ(kNumBlocks, model.latest_state[c].strength_solver.num_equations); + EXPECT_EQ(15250, model.latest_state[c].num_observations); + EXPECT_NEAR(1, model.latest_state[c].ar_gain, kARGainTolerance); + + EXPECT_EQ(kNumBlocks, + model.combined_state[c].strength_solver.num_equations); + EXPECT_EQ(15250, model.combined_state[c].num_observations); + EXPECT_NEAR(1, model.combined_state[c].ar_gain, kARGainTolerance); + } +} + +TYPED_TEST_P(NoiseModelUpdateTest, NoiseCoeffsSignalsDifferentNoiseType) { + aom_noise_model_t &model = this->model_; + const int width = this->kWidth; + const int height = this->kHeight; + const double kCoeffs[2][24] = { + { 0.02884, -0.03356, 0.00633, 0.01757, 0.02849, -0.04620, + 0.02833, -0.07178, 0.07076, -0.11603, -0.10413, -0.16571, + 0.05158, -0.07969, 0.02640, -0.07191, 0.02530, 0.41968, + 0.21450, -0.00702, -0.01401, -0.03676, -0.08713, 0.44196 }, + { 0.00269, -0.01291, -0.01513, 0.07234, 0.03208, 0.00477, + 0.00226, -0.00254, 0.03533, 0.12841, -0.25970, -0.06336, + 0.05238, -0.00845, -0.03118, 0.09043, -0.36558, 0.48903, + 0.00595, -0.11938, 0.02106, 0.095956, -0.350139, 0.59305 } + }; + + noise_synth(&this->random_, model.params.lag, model.n, model.coords, + kCoeffs[0], this->noise_ptr_[0], width, height); + for (int i = 0; i < width * height; ++i) { + this->data_ptr_[0][i] = (uint8_t)(128 + this->noise_ptr_[0][i]); + } + this->flat_blocks_.assign(this->flat_blocks_.size(), 1); + EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate()); + + // Now try with the second set of AR coefficients + noise_synth(&this->random_, model.params.lag, model.n, model.coords, + kCoeffs[1], this->noise_ptr_[0], width, height); + for (int i = 0; i < width * height; ++i) { + this->data_ptr_[0][i] = (uint8_t)(128 + this->noise_ptr_[0][i]); + } + EXPECT_EQ(AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE, this->NoiseModelUpdate()); +} +REGISTER_TYPED_TEST_SUITE_P(NoiseModelUpdateTest, UpdateFailsNoFlatBlocks, + UpdateSuccessForZeroNoiseAllFlat, + UpdateFailsBlockSizeTooSmall, + UpdateSuccessForWhiteRandomNoise, + UpdateSuccessForScaledWhiteNoise, + UpdateSuccessForCorrelatedNoise, + NoiseStrengthChangeSignalsDifferentNoiseType, + NoiseCoeffsSignalsDifferentNoiseType); + +INSTANTIATE_TYPED_TEST_SUITE_P(NoiseModelUpdateTestInstatiation, + NoiseModelUpdateTest, AllBitDepthParams); + +TEST(NoiseModelGetGrainParameters, TestLagSize) { + aom_film_grain_t film_grain; + for (int lag = 1; lag <= 3; ++lag) { + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 }; + aom_noise_model_t model; + EXPECT_TRUE(aom_noise_model_init(&model, params)); + EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain)); + EXPECT_EQ(lag, film_grain.ar_coeff_lag); + aom_noise_model_free(&model); + } + + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 4, 8, 0 }; + aom_noise_model_t model; + EXPECT_TRUE(aom_noise_model_init(&model, params)); + EXPECT_FALSE(aom_noise_model_get_grain_parameters(&model, &film_grain)); + aom_noise_model_free(&model); +} + +TEST(NoiseModelGetGrainParameters, TestARCoeffShiftBounds) { + struct TestCase { + double max_input_value; + int expected_ar_coeff_shift; + int expected_value; + }; + const int lag = 1; + const int kNumTestCases = 19; + const TestCase test_cases[] = { + // Test cases for ar_coeff_shift = 9 + { 0, 9, 0 }, + { 0.125, 9, 64 }, + { -0.125, 9, -64 }, + { 0.2499, 9, 127 }, + { -0.25, 9, -128 }, + // Test cases for ar_coeff_shift = 8 + { 0.25, 8, 64 }, + { -0.2501, 8, -64 }, + { 0.499, 8, 127 }, + { -0.5, 8, -128 }, + // Test cases for ar_coeff_shift = 7 + { 0.5, 7, 64 }, + { -0.5001, 7, -64 }, + { 0.999, 7, 127 }, + { -1, 7, -128 }, + // Test cases for ar_coeff_shift = 6 + { 1.0, 6, 64 }, + { -1.0001, 6, -64 }, + { 2.0, 6, 127 }, + { -2.0, 6, -128 }, + { 4, 6, 127 }, + { -4, 6, -128 }, + }; + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 }; + aom_noise_model_t model; + EXPECT_TRUE(aom_noise_model_init(&model, params)); + + for (int i = 0; i < kNumTestCases; ++i) { + const TestCase &test_case = test_cases[i]; + model.combined_state[0].eqns.x[0] = test_case.max_input_value; + + aom_film_grain_t film_grain; + EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain)); + EXPECT_EQ(1, film_grain.ar_coeff_lag); + EXPECT_EQ(test_case.expected_ar_coeff_shift, film_grain.ar_coeff_shift); + EXPECT_EQ(test_case.expected_value, film_grain.ar_coeffs_y[0]); + } + aom_noise_model_free(&model); +} + +TEST(NoiseModelGetGrainParameters, TestNoiseStrengthShiftBounds) { + struct TestCase { + double max_input_value; + int expected_scaling_shift; + int expected_value; + }; + const int kNumTestCases = 10; + const TestCase test_cases[] = { + { 0, 11, 0 }, { 1, 11, 64 }, { 2, 11, 128 }, { 3.99, 11, 255 }, + { 4, 10, 128 }, { 7.99, 10, 255 }, { 8, 9, 128 }, { 16, 8, 128 }, + { 31.99, 8, 255 }, { 64, 8, 255 }, // clipped + }; + const int lag = 1; + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 }; + aom_noise_model_t model; + EXPECT_TRUE(aom_noise_model_init(&model, params)); + + for (int i = 0; i < kNumTestCases; ++i) { + const TestCase &test_case = test_cases[i]; + aom_equation_system_t &eqns = model.combined_state[0].strength_solver.eqns; + // Set the fitted scale parameters to be a constant value. + for (int j = 0; j < eqns.n; ++j) { + eqns.x[j] = test_case.max_input_value; + } + aom_film_grain_t film_grain; + EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain)); + // We expect a single constant segemnt + EXPECT_EQ(test_case.expected_scaling_shift, film_grain.scaling_shift); + EXPECT_EQ(test_case.expected_value, film_grain.scaling_points_y[0][1]); + EXPECT_EQ(test_case.expected_value, film_grain.scaling_points_y[1][1]); + } + aom_noise_model_free(&model); +} + +// The AR coefficients are the same inputs used to generate "Test 2" in the test +// vectors +TEST(NoiseModelGetGrainParameters, GetGrainParametersReal) { + const double kInputCoeffsY[] = { 0.0315, 0.0073, 0.0218, 0.00235, 0.00511, + -0.0222, 0.0627, -0.022, 0.05575, -0.1816, + 0.0107, -0.1966, 0.00065, -0.0809, 0.04934, + -0.1349, -0.0352, 0.41772, 0.27973, 0.04207, + -0.0429, -0.1372, 0.06193, 0.52032 }; + const double kInputCoeffsCB[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5 }; + const double kInputCoeffsCR[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.5 }; + const int kExpectedARCoeffsY[] = { 4, 1, 3, 0, 1, -3, 8, -3, + 7, -23, 1, -25, 0, -10, 6, -17, + -5, 53, 36, 5, -5, -18, 8, 67 }; + const int kExpectedARCoeffsCB[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84 }; + const int kExpectedARCoeffsCR[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -126 }; + // Scaling function is initialized analytically with a sqrt function. + const int kNumScalingPointsY = 12; + const int kExpectedScalingPointsY[][2] = { + { 0, 0 }, { 13, 44 }, { 27, 62 }, { 40, 76 }, + { 54, 88 }, { 67, 98 }, { 94, 117 }, { 121, 132 }, + { 148, 146 }, { 174, 159 }, { 201, 171 }, { 255, 192 }, + }; + + const int lag = 3; + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 }; + aom_noise_model_t model; + EXPECT_TRUE(aom_noise_model_init(&model, params)); + + // Setup the AR coeffs + memcpy(model.combined_state[0].eqns.x, kInputCoeffsY, sizeof(kInputCoeffsY)); + memcpy(model.combined_state[1].eqns.x, kInputCoeffsCB, + sizeof(kInputCoeffsCB)); + memcpy(model.combined_state[2].eqns.x, kInputCoeffsCR, + sizeof(kInputCoeffsCR)); + for (int i = 0; i < model.combined_state[0].strength_solver.num_bins; ++i) { + const double x = + ((double)i) / (model.combined_state[0].strength_solver.num_bins - 1.0); + model.combined_state[0].strength_solver.eqns.x[i] = 6 * sqrt(x); + model.combined_state[1].strength_solver.eqns.x[i] = 3; + model.combined_state[2].strength_solver.eqns.x[i] = 2; + + // Inject some observations into the strength solver, as during film grain + // parameter extraction an estimate of the average strength will be used to + // adjust correlation. + const int n = model.combined_state[0].strength_solver.num_bins; + for (int j = 0; j < model.combined_state[0].strength_solver.num_bins; ++j) { + model.combined_state[0].strength_solver.eqns.A[i * n + j] = 1; + model.combined_state[1].strength_solver.eqns.A[i * n + j] = 1; + model.combined_state[2].strength_solver.eqns.A[i * n + j] = 1; + } + } + + aom_film_grain_t film_grain; + EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain)); + EXPECT_EQ(lag, film_grain.ar_coeff_lag); + EXPECT_EQ(3, film_grain.ar_coeff_lag); + EXPECT_EQ(7, film_grain.ar_coeff_shift); + EXPECT_EQ(10, film_grain.scaling_shift); + EXPECT_EQ(kNumScalingPointsY, film_grain.num_y_points); + EXPECT_EQ(1, film_grain.update_parameters); + EXPECT_EQ(1, film_grain.apply_grain); + + const int kNumARCoeffs = 24; + for (int i = 0; i < kNumARCoeffs; ++i) { + EXPECT_EQ(kExpectedARCoeffsY[i], film_grain.ar_coeffs_y[i]); + } + for (int i = 0; i < kNumARCoeffs + 1; ++i) { + EXPECT_EQ(kExpectedARCoeffsCB[i], film_grain.ar_coeffs_cb[i]); + } + for (int i = 0; i < kNumARCoeffs + 1; ++i) { + EXPECT_EQ(kExpectedARCoeffsCR[i], film_grain.ar_coeffs_cr[i]); + } + for (int i = 0; i < kNumScalingPointsY; ++i) { + EXPECT_EQ(kExpectedScalingPointsY[i][0], film_grain.scaling_points_y[i][0]); + EXPECT_EQ(kExpectedScalingPointsY[i][1], film_grain.scaling_points_y[i][1]); + } + + // CB strength should just be a piecewise segment + EXPECT_EQ(2, film_grain.num_cb_points); + EXPECT_EQ(0, film_grain.scaling_points_cb[0][0]); + EXPECT_EQ(255, film_grain.scaling_points_cb[1][0]); + EXPECT_EQ(96, film_grain.scaling_points_cb[0][1]); + EXPECT_EQ(96, film_grain.scaling_points_cb[1][1]); + + // CR strength should just be a piecewise segment + EXPECT_EQ(2, film_grain.num_cr_points); + EXPECT_EQ(0, film_grain.scaling_points_cr[0][0]); + EXPECT_EQ(255, film_grain.scaling_points_cr[1][0]); + EXPECT_EQ(64, film_grain.scaling_points_cr[0][1]); + EXPECT_EQ(64, film_grain.scaling_points_cr[1][1]); + + EXPECT_EQ(128, film_grain.cb_mult); + EXPECT_EQ(192, film_grain.cb_luma_mult); + EXPECT_EQ(256, film_grain.cb_offset); + EXPECT_EQ(128, film_grain.cr_mult); + EXPECT_EQ(192, film_grain.cr_luma_mult); + EXPECT_EQ(256, film_grain.cr_offset); + EXPECT_EQ(0, film_grain.chroma_scaling_from_luma); + EXPECT_EQ(0, film_grain.grain_scale_shift); + + aom_noise_model_free(&model); +} + +template <typename T> +class WienerDenoiseTest : public ::testing::Test, public T { + public: + static void SetUpTestSuite() { aom_dsp_rtcd(); } + + protected: + void SetUp() override { + static const float kNoiseLevel = 5.f; + static const float kStd = 4.0; + static const double kMaxValue = (1 << T::kBitDepth) - 1; + + chroma_sub_[0] = 1; + chroma_sub_[1] = 1; + stride_[0] = kWidth; + stride_[1] = kWidth / 2; + stride_[2] = kWidth / 2; + for (int k = 0; k < 3; ++k) { + data_[k].resize(kWidth * kHeight); + denoised_[k].resize(kWidth * kHeight); + noise_psd_[k].resize(kBlockSize * kBlockSize); + } + + const double kCoeffsY[] = { 0.0406, -0.116, -0.078, -0.152, 0.0033, -0.093, + 0.048, 0.404, 0.2353, -0.035, -0.093, 0.441 }; + const int kCoords[12][2] = { + { -2, -2 }, { -1, -2 }, { 0, -2 }, { 1, -2 }, { 2, -2 }, { -2, -1 }, + { -1, -1 }, { 0, -1 }, { 1, -1 }, { 2, -1 }, { -2, 0 }, { -1, 0 } + }; + const int kLag = 2; + const int kLength = 12; + libaom_test::ACMRandom random; + std::vector<double> noise(kWidth * kHeight); + noise_synth(&random, kLag, kLength, kCoords, kCoeffsY, &noise[0], kWidth, + kHeight); + noise_psd_[0] = get_noise_psd(&noise[0], kWidth, kHeight, kBlockSize); + for (int i = 0; i < kBlockSize * kBlockSize; ++i) { + noise_psd_[0][i] = (float)(noise_psd_[0][i] * kStd * kStd * kScaleNoise * + kScaleNoise / (kMaxValue * kMaxValue)); + } + + float psd_value = + aom_noise_psd_get_default_value(kBlockSizeChroma, kNoiseLevel); + for (int i = 0; i < kBlockSizeChroma * kBlockSizeChroma; ++i) { + noise_psd_[1][i] = psd_value; + noise_psd_[2][i] = psd_value; + } + for (int y = 0; y < kHeight; ++y) { + for (int x = 0; x < kWidth; ++x) { + data_[0][y * stride_[0] + x] = (typename T::data_type_t)fclamp( + (x + noise[y * stride_[0] + x] * kStd) * kScaleNoise, 0, kMaxValue); + } + } + + for (int c = 1; c < 3; ++c) { + for (int y = 0; y < (kHeight >> 1); ++y) { + for (int x = 0; x < (kWidth >> 1); ++x) { + data_[c][y * stride_[c] + x] = (typename T::data_type_t)fclamp( + (x + randn(&random, kStd)) * kScaleNoise, 0, kMaxValue); + } + } + } + for (int k = 0; k < 3; ++k) { + noise_psd_ptrs_[k] = &noise_psd_[k][0]; + } + } + static const int kBlockSize = 32; + static const int kBlockSizeChroma = 16; + static const int kWidth = 256; + static const int kHeight = 256; + static const int kScaleNoise = 1 << (T::kBitDepth - 8); + + std::vector<typename T::data_type_t> data_[3]; + std::vector<typename T::data_type_t> denoised_[3]; + std::vector<float> noise_psd_[3]; + int chroma_sub_[2]; + float *noise_psd_ptrs_[3]; + int stride_[3]; +}; + +TYPED_TEST_SUITE_P(WienerDenoiseTest); + +TYPED_TEST_P(WienerDenoiseTest, InvalidBlockSize) { + const uint8_t *const data_ptrs[3] = { + reinterpret_cast<uint8_t *>(&this->data_[0][0]), + reinterpret_cast<uint8_t *>(&this->data_[1][0]), + reinterpret_cast<uint8_t *>(&this->data_[2][0]), + }; + uint8_t *denoised_ptrs[3] = { + reinterpret_cast<uint8_t *>(&this->denoised_[0][0]), + reinterpret_cast<uint8_t *>(&this->denoised_[1][0]), + reinterpret_cast<uint8_t *>(&this->denoised_[2][0]), + }; + EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth, + this->kHeight, this->stride_, + this->chroma_sub_, this->noise_psd_ptrs_, + 18, this->kBitDepth, this->kUseHighBD)); + EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth, + this->kHeight, this->stride_, + this->chroma_sub_, this->noise_psd_ptrs_, + 48, this->kBitDepth, this->kUseHighBD)); + EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth, + this->kHeight, this->stride_, + this->chroma_sub_, this->noise_psd_ptrs_, + 64, this->kBitDepth, this->kUseHighBD)); +} + +TYPED_TEST_P(WienerDenoiseTest, InvalidChromaSubsampling) { + const uint8_t *const data_ptrs[3] = { + reinterpret_cast<uint8_t *>(&this->data_[0][0]), + reinterpret_cast<uint8_t *>(&this->data_[1][0]), + reinterpret_cast<uint8_t *>(&this->data_[2][0]), + }; + uint8_t *denoised_ptrs[3] = { + reinterpret_cast<uint8_t *>(&this->denoised_[0][0]), + reinterpret_cast<uint8_t *>(&this->denoised_[1][0]), + reinterpret_cast<uint8_t *>(&this->denoised_[2][0]), + }; + int chroma_sub[2] = { 1, 0 }; + EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth, + this->kHeight, this->stride_, chroma_sub, + this->noise_psd_ptrs_, 32, this->kBitDepth, + this->kUseHighBD)); + + chroma_sub[0] = 0; + chroma_sub[1] = 1; + EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth, + this->kHeight, this->stride_, chroma_sub, + this->noise_psd_ptrs_, 32, this->kBitDepth, + this->kUseHighBD)); +} + +TYPED_TEST_P(WienerDenoiseTest, GradientTest) { + const int width = this->kWidth; + const int height = this->kHeight; + const int block_size = this->kBlockSize; + const uint8_t *const data_ptrs[3] = { + reinterpret_cast<uint8_t *>(&this->data_[0][0]), + reinterpret_cast<uint8_t *>(&this->data_[1][0]), + reinterpret_cast<uint8_t *>(&this->data_[2][0]), + }; + uint8_t *denoised_ptrs[3] = { + reinterpret_cast<uint8_t *>(&this->denoised_[0][0]), + reinterpret_cast<uint8_t *>(&this->denoised_[1][0]), + reinterpret_cast<uint8_t *>(&this->denoised_[2][0]), + }; + const int ret = aom_wiener_denoise_2d( + data_ptrs, denoised_ptrs, width, height, this->stride_, this->chroma_sub_, + this->noise_psd_ptrs_, block_size, this->kBitDepth, this->kUseHighBD); + EXPECT_EQ(1, ret); + + // Check the noise on the denoised image (from the analytical gradient) + // and make sure that it is less than what we added. + for (int c = 0; c < 3; ++c) { + std::vector<double> measured_noise(width * height); + + double var = 0; + const int shift = (c > 0); + for (int x = 0; x < (width >> shift); ++x) { + for (int y = 0; y < (height >> shift); ++y) { + const double diff = this->denoised_[c][y * this->stride_[c] + x] - + x * this->kScaleNoise; + var += diff * diff; + measured_noise[y * width + x] = diff; + } + } + var /= (width * height); + const double std = sqrt(std::max(0.0, var)); + EXPECT_LE(std, 1.25f * this->kScaleNoise); + if (c == 0) { + std::vector<float> measured_psd = + get_noise_psd(&measured_noise[0], width, height, block_size); + std::vector<double> measured_psd_d(block_size * block_size); + std::vector<double> noise_psd_d(block_size * block_size); + std::copy(measured_psd.begin(), measured_psd.end(), + measured_psd_d.begin()); + std::copy(this->noise_psd_[0].begin(), this->noise_psd_[0].end(), + noise_psd_d.begin()); + EXPECT_LT( + aom_normalized_cross_correlation(&measured_psd_d[0], &noise_psd_d[0], + (int)(noise_psd_d.size())), + 0.35); + } + } +} + +REGISTER_TYPED_TEST_SUITE_P(WienerDenoiseTest, InvalidBlockSize, + InvalidChromaSubsampling, GradientTest); + +INSTANTIATE_TYPED_TEST_SUITE_P(WienerDenoiseTestInstatiation, WienerDenoiseTest, + AllBitDepthParams); diff --git a/third_party/aom/test/obmc_sad_test.cc b/third_party/aom/test/obmc_sad_test.cc new file mode 100644 index 0000000000..967b677666 --- /dev/null +++ b/third_party/aom/test/obmc_sad_test.cc @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/function_equivalence_test.h" +#include "test/register_state_check.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom/aom_integer.h" + +#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) + +using libaom_test::FunctionEquivalenceTest; + +namespace { + +static const int kIterations = 1000; +static const int kMaskMax = 64; + +typedef unsigned int (*ObmcSadF)(const uint8_t *pre, int pre_stride, + const int32_t *wsrc, const int32_t *mask); +typedef libaom_test::FuncParam<ObmcSadF> TestFuncs; + +//////////////////////////////////////////////////////////////////////////////// +// 8 bit +//////////////////////////////////////////////////////////////////////////////// + +class ObmcSadTest : public FunctionEquivalenceTest<ObmcSadF> {}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcSadTest); + +TEST_P(ObmcSadTest, RandomValues) { + DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + const int pre_stride = rng_(MAX_SB_SIZE + 1); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = rng_.Rand8(); + wsrc[i] = rng_.Rand8() * rng_(kMaskMax * kMaskMax + 1); + mask[i] = rng_(kMaskMax * kMaskMax + 1); + } + + const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask); + unsigned int tst_res; + API_REGISTER_STATE_CHECK(tst_res = + params_.tst_func(pre, pre_stride, wsrc, mask)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(ObmcSadTest, ExtremeValues) { + DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { + const int pre_stride = iter; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = UINT8_MAX; + wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax; + mask[i] = kMaskMax * kMaskMax; + } + + const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask); + unsigned int tst_res; + API_REGISTER_STATE_CHECK(tst_res = + params_.tst_func(pre, pre_stride, wsrc, mask)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +#if HAVE_SSE4_1 +const ObmcSadTest::ParamType sse4_functions[] = { + TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_sse4_1), + TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_sse4_1), + TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_sse4_1), + TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_sse4_1), + TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_sse4_1), + TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_sse4_1), + TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_sse4_1), + TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_sse4_1), + TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_sse4_1), + TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_sse4_1), + TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_sse4_1), + TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_sse4_1), + TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_sse4_1), + TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_sse4_1), + TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_sse4_1), + TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_sse4_1), + + TestFuncs(aom_obmc_sad64x16_c, aom_obmc_sad64x16_sse4_1), + TestFuncs(aom_obmc_sad16x64_c, aom_obmc_sad16x64_sse4_1), + TestFuncs(aom_obmc_sad32x8_c, aom_obmc_sad32x8_sse4_1), + TestFuncs(aom_obmc_sad8x32_c, aom_obmc_sad8x32_sse4_1), + TestFuncs(aom_obmc_sad16x4_c, aom_obmc_sad16x4_sse4_1), + TestFuncs(aom_obmc_sad4x16_c, aom_obmc_sad4x16_sse4_1), +}; + +INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcSadTest, + ::testing::ValuesIn(sse4_functions)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +const ObmcSadTest::ParamType avx2_functions[] = { + TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_avx2), + TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_avx2), + TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_avx2), + TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_avx2), + TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_avx2), + TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_avx2), + TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_avx2), + TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_avx2), + TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_avx2), + TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_avx2), + TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_avx2), + TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_avx2), + TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_avx2), + TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_avx2), + TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_avx2), + TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_avx2), + + TestFuncs(aom_obmc_sad64x16_c, aom_obmc_sad64x16_avx2), + TestFuncs(aom_obmc_sad16x64_c, aom_obmc_sad16x64_avx2), + TestFuncs(aom_obmc_sad32x8_c, aom_obmc_sad32x8_avx2), + TestFuncs(aom_obmc_sad8x32_c, aom_obmc_sad8x32_avx2), + TestFuncs(aom_obmc_sad16x4_c, aom_obmc_sad16x4_avx2), + TestFuncs(aom_obmc_sad4x16_c, aom_obmc_sad4x16_avx2), +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, ObmcSadTest, + ::testing::ValuesIn(avx2_functions)); +#endif // HAVE_AVX2 + +#if HAVE_NEON +const ObmcSadTest::ParamType neon_functions[] = { + TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_neon), + TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_neon), + TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_neon), + TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_neon), + TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_neon), + TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_neon), + TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_neon), + TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_neon), + TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_neon), + TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_neon), + TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_neon), + TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_neon), + TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_neon), + TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_neon), + TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_neon), + TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_neon), + + TestFuncs(aom_obmc_sad64x16_c, aom_obmc_sad64x16_neon), + TestFuncs(aom_obmc_sad16x64_c, aom_obmc_sad16x64_neon), + TestFuncs(aom_obmc_sad32x8_c, aom_obmc_sad32x8_neon), + TestFuncs(aom_obmc_sad8x32_c, aom_obmc_sad8x32_neon), + TestFuncs(aom_obmc_sad16x4_c, aom_obmc_sad16x4_neon), + TestFuncs(aom_obmc_sad4x16_c, aom_obmc_sad4x16_neon), +}; + +INSTANTIATE_TEST_SUITE_P(NEON, ObmcSadTest, + ::testing::ValuesIn(neon_functions)); +#endif // HAVE_NEON + +#if CONFIG_AV1_HIGHBITDEPTH +//////////////////////////////////////////////////////////////////////////////// +// High bit-depth +//////////////////////////////////////////////////////////////////////////////// + +class ObmcSadHBDTest : public FunctionEquivalenceTest<ObmcSadF> {}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcSadHBDTest); + +TEST_P(ObmcSadHBDTest, RandomValues) { + DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + const int pre_stride = rng_(MAX_SB_SIZE + 1); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = rng_(1 << 12); + wsrc[i] = rng_(1 << 12) * rng_(kMaskMax * kMaskMax + 1); + mask[i] = rng_(kMaskMax * kMaskMax + 1); + } + + const unsigned int ref_res = + params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask); + unsigned int tst_res; + API_REGISTER_STATE_CHECK( + tst_res = + params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(ObmcSadHBDTest, ExtremeValues) { + DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { + const int pre_stride = iter; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = (1 << 12) - 1; + wsrc[i] = ((1 << 12) - 1) * kMaskMax * kMaskMax; + mask[i] = kMaskMax * kMaskMax; + } + + const unsigned int ref_res = + params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask); + unsigned int tst_res; + API_REGISTER_STATE_CHECK( + tst_res = + params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +#if HAVE_NEON +ObmcSadHBDTest::ParamType neon_functions_hbd[] = { + TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_neon), + TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_neon), + TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_neon), + TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_neon), + TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_neon), + TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_neon), + TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_neon), + TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_neon), + TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_neon), + TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_neon), + TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_neon), + TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_neon), + TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_neon), + TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_neon), + TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_neon), + TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_neon), +#if !CONFIG_REALTIME_ONLY + TestFuncs(aom_highbd_obmc_sad64x16_c, aom_highbd_obmc_sad64x16_neon), + TestFuncs(aom_highbd_obmc_sad16x64_c, aom_highbd_obmc_sad16x64_neon), + TestFuncs(aom_highbd_obmc_sad32x8_c, aom_highbd_obmc_sad32x8_neon), + TestFuncs(aom_highbd_obmc_sad8x32_c, aom_highbd_obmc_sad8x32_neon), + TestFuncs(aom_highbd_obmc_sad16x4_c, aom_highbd_obmc_sad16x4_neon), + TestFuncs(aom_highbd_obmc_sad4x16_c, aom_highbd_obmc_sad4x16_neon), +#endif // !CONFIG_REALTIME_ONLY +}; + +INSTANTIATE_TEST_SUITE_P(NEON, ObmcSadHBDTest, + ::testing::ValuesIn(neon_functions_hbd)); +#endif // HAVE_NEON + +#if HAVE_SSE4_1 +ObmcSadHBDTest::ParamType sse4_functions_hbd[] = { + TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_sse4_1), + TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_sse4_1), + TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_sse4_1), + TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_sse4_1), + TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_sse4_1), + TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_sse4_1), + TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_sse4_1), + TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_sse4_1), + TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_sse4_1), + TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_sse4_1), + TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_sse4_1), + TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_sse4_1), + TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_sse4_1), + TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_sse4_1), + TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_sse4_1), + TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_sse4_1), + + TestFuncs(aom_highbd_obmc_sad64x16_c, aom_highbd_obmc_sad64x16_sse4_1), + TestFuncs(aom_highbd_obmc_sad16x64_c, aom_highbd_obmc_sad16x64_sse4_1), + TestFuncs(aom_highbd_obmc_sad32x8_c, aom_highbd_obmc_sad32x8_sse4_1), + TestFuncs(aom_highbd_obmc_sad8x32_c, aom_highbd_obmc_sad8x32_sse4_1), + TestFuncs(aom_highbd_obmc_sad16x4_c, aom_highbd_obmc_sad16x4_sse4_1), + TestFuncs(aom_highbd_obmc_sad4x16_c, aom_highbd_obmc_sad4x16_sse4_1), +}; + +INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcSadHBDTest, + ::testing::ValuesIn(sse4_functions_hbd)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +ObmcSadHBDTest::ParamType avx2_functions_hbd[] = { + TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_avx2), + TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_avx2), + TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_avx2), + TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_avx2), + TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_avx2), + TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_avx2), + TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_avx2), + TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_avx2), + TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_avx2), + TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_avx2), + TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_avx2), + TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_avx2), + TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_avx2), + TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_avx2), + TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_avx2), + TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_avx2), + + TestFuncs(aom_highbd_obmc_sad64x16_c, aom_highbd_obmc_sad64x16_avx2), + TestFuncs(aom_highbd_obmc_sad16x64_c, aom_highbd_obmc_sad16x64_avx2), + TestFuncs(aom_highbd_obmc_sad32x8_c, aom_highbd_obmc_sad32x8_avx2), + TestFuncs(aom_highbd_obmc_sad8x32_c, aom_highbd_obmc_sad8x32_avx2), + TestFuncs(aom_highbd_obmc_sad16x4_c, aom_highbd_obmc_sad16x4_avx2), + TestFuncs(aom_highbd_obmc_sad4x16_c, aom_highbd_obmc_sad4x16_avx2), +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, ObmcSadHBDTest, + ::testing::ValuesIn(avx2_functions_hbd)); +#endif // HAVE_AVX2 +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/obmc_variance_test.cc b/third_party/aom/test/obmc_variance_test.cc new file mode 100644 index 0000000000..5f21a8a6c1 --- /dev/null +++ b/third_party/aom/test/obmc_variance_test.cc @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" + +#include "test/function_equivalence_test.h" +#include "test/register_state_check.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom/aom_integer.h" + +#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) + +using libaom_test::ACMRandom; +using libaom_test::FunctionEquivalenceTest; + +namespace { + +static const int kIterations = 1000; +static const int kMaskMax = 64; + +typedef unsigned int (*ObmcVarF)(const uint8_t *pre, int pre_stride, + const int32_t *wsrc, const int32_t *mask, + unsigned int *sse); +typedef libaom_test::FuncParam<ObmcVarF> TestFuncs; + +//////////////////////////////////////////////////////////////////////////////// +// 8 bit +//////////////////////////////////////////////////////////////////////////////// + +class ObmcVarianceTest : public FunctionEquivalenceTest<ObmcVarF> {}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcVarianceTest); + +TEST_P(ObmcVarianceTest, RandomValues) { + DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + const int pre_stride = this->rng_(MAX_SB_SIZE + 1); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = this->rng_.Rand8(); + wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1); + mask[i] = this->rng_(kMaskMax * kMaskMax + 1); + } + + unsigned int ref_sse, tst_sse; + const unsigned int ref_res = + params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse); + unsigned int tst_res; + API_REGISTER_STATE_CHECK( + tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse)); + + ASSERT_EQ(ref_res, tst_res); + ASSERT_EQ(ref_sse, tst_sse); + } +} + +TEST_P(ObmcVarianceTest, ExtremeValues) { + DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { + const int pre_stride = iter; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = UINT8_MAX; + wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax; + mask[i] = kMaskMax * kMaskMax; + } + + unsigned int ref_sse, tst_sse; + const unsigned int ref_res = + params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse); + unsigned int tst_res; + API_REGISTER_STATE_CHECK( + tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse)); + + ASSERT_EQ(ref_res, tst_res); + ASSERT_EQ(ref_sse, tst_sse); + } +} + +TEST_P(ObmcVarianceTest, DISABLED_Speed) { + DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + const int pre_stride = this->rng_(MAX_SB_SIZE + 1); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = this->rng_.Rand8(); + wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1); + mask[i] = this->rng_(kMaskMax * kMaskMax + 1); + } + + const int num_loops = 1000000; + unsigned int ref_sse, tst_sse; + aom_usec_timer ref_timer, test_timer; + + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < num_loops; ++i) { + params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int i = 0; i < num_loops; ++i) { + params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse); + } + aom_usec_timer_mark(&test_timer); + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + + printf("c_time=%d \t simd_time=%d \t gain=%f \n", elapsed_time_c, + elapsed_time_simd, + static_cast<double>(elapsed_time_c) / elapsed_time_simd); +} + +#if HAVE_SSE4_1 +const ObmcVarianceTest::ParamType sse4_functions[] = { + TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_sse4_1), + TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_sse4_1), + TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_sse4_1), + TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_sse4_1), + TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_sse4_1), + TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_sse4_1), + TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_sse4_1), + TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_sse4_1), + TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_sse4_1), + TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_sse4_1), + TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_sse4_1), + TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_sse4_1), + TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_sse4_1), + TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_sse4_1), + TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_sse4_1), + TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_sse4_1), + + TestFuncs(aom_obmc_variance64x16_c, aom_obmc_variance64x16_sse4_1), + TestFuncs(aom_obmc_variance16x64_c, aom_obmc_variance16x64_sse4_1), + TestFuncs(aom_obmc_variance32x8_c, aom_obmc_variance32x8_sse4_1), + TestFuncs(aom_obmc_variance8x32_c, aom_obmc_variance8x32_sse4_1), + TestFuncs(aom_obmc_variance16x4_c, aom_obmc_variance16x4_sse4_1), + TestFuncs(aom_obmc_variance4x16_c, aom_obmc_variance4x16_sse4_1), +}; + +INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcVarianceTest, + ::testing::ValuesIn(sse4_functions)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +const ObmcVarianceTest::ParamType avx2_functions[] = { + TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_avx2), + TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_avx2), + TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_avx2), + TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_avx2), + TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_avx2), + TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_avx2), + TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_avx2), + TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_avx2), + TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_avx2), + TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_avx2), + TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_avx2), + TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_avx2), + TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_avx2), + TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_avx2), + TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_avx2), + TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_avx2), + + TestFuncs(aom_obmc_variance64x16_c, aom_obmc_variance64x16_avx2), + TestFuncs(aom_obmc_variance16x64_c, aom_obmc_variance16x64_avx2), + TestFuncs(aom_obmc_variance32x8_c, aom_obmc_variance32x8_avx2), + TestFuncs(aom_obmc_variance8x32_c, aom_obmc_variance8x32_avx2), + TestFuncs(aom_obmc_variance16x4_c, aom_obmc_variance16x4_avx2), + TestFuncs(aom_obmc_variance4x16_c, aom_obmc_variance4x16_avx2), +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, ObmcVarianceTest, + ::testing::ValuesIn(avx2_functions)); +#endif // HAVE_AVX2 + +#if HAVE_NEON +const ObmcVarianceTest::ParamType neon_functions[] = { + TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_neon), + TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_neon), + TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_neon), + TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_neon), + TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_neon), + TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_neon), + TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_neon), + TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_neon), + TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_neon), + TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_neon), + TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_neon), + TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_neon), + TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_neon), + TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_neon), + TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_neon), + TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_neon), + + TestFuncs(aom_obmc_variance64x16_c, aom_obmc_variance64x16_neon), + TestFuncs(aom_obmc_variance16x64_c, aom_obmc_variance16x64_neon), + TestFuncs(aom_obmc_variance32x8_c, aom_obmc_variance32x8_neon), + TestFuncs(aom_obmc_variance8x32_c, aom_obmc_variance8x32_neon), + TestFuncs(aom_obmc_variance16x4_c, aom_obmc_variance16x4_neon), + TestFuncs(aom_obmc_variance4x16_c, aom_obmc_variance4x16_neon), +}; + +INSTANTIATE_TEST_SUITE_P(NEON, ObmcVarianceTest, + ::testing::ValuesIn(neon_functions)); +#endif // HAVE_NEON + +//////////////////////////////////////////////////////////////////////////////// +// High bit-depth +//////////////////////////////////////////////////////////////////////////////// +#if CONFIG_AV1_HIGHBITDEPTH && !CONFIG_REALTIME_ONLY +class ObmcVarianceHBDTest : public FunctionEquivalenceTest<ObmcVarF> {}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ObmcVarianceHBDTest); + +TEST_P(ObmcVarianceHBDTest, RandomValues) { + DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + const int pre_stride = this->rng_(MAX_SB_SIZE + 1); + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = this->rng_(1 << params_.bit_depth); + wsrc[i] = this->rng_(1 << params_.bit_depth) * + this->rng_(kMaskMax * kMaskMax + 1); + mask[i] = this->rng_(kMaskMax * kMaskMax + 1); + } + + unsigned int ref_sse, tst_sse; + const unsigned int ref_res = params_.ref_func( + CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse); + unsigned int tst_res; + API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre), + pre_stride, wsrc, mask, + &tst_sse)); + + ASSERT_EQ(ref_res, tst_res); + ASSERT_EQ(ref_sse, tst_sse); + } +} + +TEST_P(ObmcVarianceHBDTest, ExtremeValues) { + DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]); + + for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) { + const int pre_stride = iter; + + for (int i = 0; i < MAX_SB_SQUARE; ++i) { + pre[i] = (1 << params_.bit_depth) - 1; + wsrc[i] = ((1 << params_.bit_depth) - 1) * kMaskMax * kMaskMax; + mask[i] = kMaskMax * kMaskMax; + } + + unsigned int ref_sse, tst_sse; + const unsigned int ref_res = params_.ref_func( + CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse); + unsigned int tst_res; + API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre), + pre_stride, wsrc, mask, + &tst_sse)); + + ASSERT_EQ(ref_res, tst_res); + ASSERT_EQ(ref_sse, tst_sse); + } +} + +#if HAVE_NEON +ObmcVarianceHBDTest::ParamType neon_functions_hbd[] = { + TestFuncs(aom_highbd_8_obmc_variance128x128_c, + aom_highbd_8_obmc_variance128x128_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance128x64_c, + aom_highbd_8_obmc_variance128x64_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance64x128_c, + aom_highbd_8_obmc_variance64x128_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance64x64_c, + aom_highbd_8_obmc_variance64x64_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance64x32_c, + aom_highbd_8_obmc_variance64x32_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance32x64_c, + aom_highbd_8_obmc_variance32x64_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance32x32_c, + aom_highbd_8_obmc_variance32x32_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance32x16_c, + aom_highbd_8_obmc_variance32x16_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance16x32_c, + aom_highbd_8_obmc_variance16x32_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance16x16_c, + aom_highbd_8_obmc_variance16x16_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance16x8_c, + aom_highbd_8_obmc_variance16x8_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance8x16_c, + aom_highbd_8_obmc_variance8x16_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance8x8_c, aom_highbd_8_obmc_variance8x8_neon, + 8), + TestFuncs(aom_highbd_8_obmc_variance8x4_c, aom_highbd_8_obmc_variance8x4_neon, + 8), + TestFuncs(aom_highbd_8_obmc_variance4x8_c, aom_highbd_8_obmc_variance4x8_neon, + 8), + TestFuncs(aom_highbd_8_obmc_variance4x4_c, aom_highbd_8_obmc_variance4x4_neon, + 8), + TestFuncs(aom_highbd_10_obmc_variance128x128_c, + aom_highbd_10_obmc_variance128x128_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance128x64_c, + aom_highbd_10_obmc_variance128x64_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance64x128_c, + aom_highbd_10_obmc_variance64x128_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance64x64_c, + aom_highbd_10_obmc_variance64x64_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance64x32_c, + aom_highbd_10_obmc_variance64x32_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance32x64_c, + aom_highbd_10_obmc_variance32x64_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance32x32_c, + aom_highbd_10_obmc_variance32x32_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance32x16_c, + aom_highbd_10_obmc_variance32x16_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance16x32_c, + aom_highbd_10_obmc_variance16x32_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance16x16_c, + aom_highbd_10_obmc_variance16x16_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance16x8_c, + aom_highbd_10_obmc_variance16x8_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance8x16_c, + aom_highbd_10_obmc_variance8x16_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance8x8_c, + aom_highbd_10_obmc_variance8x8_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance8x4_c, + aom_highbd_10_obmc_variance8x4_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance4x8_c, + aom_highbd_10_obmc_variance4x8_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance4x4_c, + aom_highbd_10_obmc_variance4x4_neon, 10), + TestFuncs(aom_highbd_12_obmc_variance128x128_c, + aom_highbd_12_obmc_variance128x128_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance128x64_c, + aom_highbd_12_obmc_variance128x64_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance64x128_c, + aom_highbd_12_obmc_variance64x128_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance64x64_c, + aom_highbd_12_obmc_variance64x64_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance64x32_c, + aom_highbd_12_obmc_variance64x32_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance32x64_c, + aom_highbd_12_obmc_variance32x64_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance32x32_c, + aom_highbd_12_obmc_variance32x32_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance32x16_c, + aom_highbd_12_obmc_variance32x16_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance16x32_c, + aom_highbd_12_obmc_variance16x32_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance16x16_c, + aom_highbd_12_obmc_variance16x16_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance16x8_c, + aom_highbd_12_obmc_variance16x8_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance8x16_c, + aom_highbd_12_obmc_variance8x16_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance8x8_c, + aom_highbd_12_obmc_variance8x8_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance8x4_c, + aom_highbd_12_obmc_variance8x4_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance4x8_c, + aom_highbd_12_obmc_variance4x8_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance4x4_c, + aom_highbd_12_obmc_variance4x4_neon, 12), + TestFuncs(aom_highbd_8_obmc_variance64x16_c, + aom_highbd_8_obmc_variance64x16_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance16x64_c, + aom_highbd_8_obmc_variance16x64_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance32x8_c, + aom_highbd_8_obmc_variance32x8_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance8x32_c, + aom_highbd_8_obmc_variance8x32_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance16x4_c, + aom_highbd_8_obmc_variance16x4_neon, 8), + TestFuncs(aom_highbd_8_obmc_variance4x16_c, + aom_highbd_8_obmc_variance4x16_neon, 8), + TestFuncs(aom_highbd_10_obmc_variance64x16_c, + aom_highbd_10_obmc_variance64x16_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance16x64_c, + aom_highbd_10_obmc_variance16x64_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance32x8_c, + aom_highbd_10_obmc_variance32x8_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance8x32_c, + aom_highbd_10_obmc_variance8x32_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance16x4_c, + aom_highbd_10_obmc_variance16x4_neon, 10), + TestFuncs(aom_highbd_10_obmc_variance4x16_c, + aom_highbd_10_obmc_variance4x16_neon, 10), + TestFuncs(aom_highbd_12_obmc_variance64x16_c, + aom_highbd_12_obmc_variance64x16_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance16x64_c, + aom_highbd_12_obmc_variance16x64_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance32x8_c, + aom_highbd_12_obmc_variance32x8_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance8x32_c, + aom_highbd_12_obmc_variance8x32_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance16x4_c, + aom_highbd_12_obmc_variance16x4_neon, 12), + TestFuncs(aom_highbd_12_obmc_variance4x16_c, + aom_highbd_12_obmc_variance4x16_neon, 12), +}; + +INSTANTIATE_TEST_SUITE_P(NEON, ObmcVarianceHBDTest, + ::testing::ValuesIn(neon_functions_hbd)); +#endif // HAVE_NEON + +#if HAVE_SSE4_1 +ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = { + TestFuncs(aom_highbd_8_obmc_variance128x128_c, + aom_highbd_8_obmc_variance128x128_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance128x64_c, + aom_highbd_8_obmc_variance128x64_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance64x128_c, + aom_highbd_8_obmc_variance64x128_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance64x64_c, + aom_highbd_8_obmc_variance64x64_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance64x32_c, + aom_highbd_8_obmc_variance64x32_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance32x64_c, + aom_highbd_8_obmc_variance32x64_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance32x32_c, + aom_highbd_8_obmc_variance32x32_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance32x16_c, + aom_highbd_8_obmc_variance32x16_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance16x32_c, + aom_highbd_8_obmc_variance16x32_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance16x16_c, + aom_highbd_8_obmc_variance16x16_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance16x8_c, + aom_highbd_8_obmc_variance16x8_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance8x16_c, + aom_highbd_8_obmc_variance8x16_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance8x8_c, + aom_highbd_8_obmc_variance8x8_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance8x4_c, + aom_highbd_8_obmc_variance8x4_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance4x8_c, + aom_highbd_8_obmc_variance4x8_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance4x4_c, + aom_highbd_8_obmc_variance4x4_sse4_1, 8), + TestFuncs(aom_highbd_10_obmc_variance128x128_c, + aom_highbd_10_obmc_variance128x128_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance128x64_c, + aom_highbd_10_obmc_variance128x64_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance64x128_c, + aom_highbd_10_obmc_variance64x128_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance64x64_c, + aom_highbd_10_obmc_variance64x64_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance64x32_c, + aom_highbd_10_obmc_variance64x32_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance32x64_c, + aom_highbd_10_obmc_variance32x64_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance32x32_c, + aom_highbd_10_obmc_variance32x32_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance32x16_c, + aom_highbd_10_obmc_variance32x16_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance16x32_c, + aom_highbd_10_obmc_variance16x32_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance16x16_c, + aom_highbd_10_obmc_variance16x16_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance16x8_c, + aom_highbd_10_obmc_variance16x8_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance8x16_c, + aom_highbd_10_obmc_variance8x16_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance8x8_c, + aom_highbd_10_obmc_variance8x8_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance8x4_c, + aom_highbd_10_obmc_variance8x4_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance4x8_c, + aom_highbd_10_obmc_variance4x8_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance4x4_c, + aom_highbd_10_obmc_variance4x4_sse4_1, 10), + TestFuncs(aom_highbd_12_obmc_variance128x128_c, + aom_highbd_12_obmc_variance128x128_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance128x64_c, + aom_highbd_12_obmc_variance128x64_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance64x128_c, + aom_highbd_12_obmc_variance64x128_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance64x64_c, + aom_highbd_12_obmc_variance64x64_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance64x32_c, + aom_highbd_12_obmc_variance64x32_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance32x64_c, + aom_highbd_12_obmc_variance32x64_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance32x32_c, + aom_highbd_12_obmc_variance32x32_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance32x16_c, + aom_highbd_12_obmc_variance32x16_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance16x32_c, + aom_highbd_12_obmc_variance16x32_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance16x16_c, + aom_highbd_12_obmc_variance16x16_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance16x8_c, + aom_highbd_12_obmc_variance16x8_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance8x16_c, + aom_highbd_12_obmc_variance8x16_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance8x8_c, + aom_highbd_12_obmc_variance8x8_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance8x4_c, + aom_highbd_12_obmc_variance8x4_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance4x8_c, + aom_highbd_12_obmc_variance4x8_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance4x4_c, + aom_highbd_12_obmc_variance4x4_sse4_1, 12), + + TestFuncs(aom_highbd_8_obmc_variance64x16_c, + aom_highbd_8_obmc_variance64x16_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance16x64_c, + aom_highbd_8_obmc_variance16x64_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance32x8_c, + aom_highbd_8_obmc_variance32x8_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance8x32_c, + aom_highbd_8_obmc_variance8x32_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance16x4_c, + aom_highbd_8_obmc_variance16x4_sse4_1, 8), + TestFuncs(aom_highbd_8_obmc_variance4x16_c, + aom_highbd_8_obmc_variance4x16_sse4_1, 8), + TestFuncs(aom_highbd_10_obmc_variance64x16_c, + aom_highbd_10_obmc_variance64x16_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance16x64_c, + aom_highbd_10_obmc_variance16x64_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance32x8_c, + aom_highbd_10_obmc_variance32x8_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance8x32_c, + aom_highbd_10_obmc_variance8x32_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance16x4_c, + aom_highbd_10_obmc_variance16x4_sse4_1, 10), + TestFuncs(aom_highbd_10_obmc_variance4x16_c, + aom_highbd_10_obmc_variance4x16_sse4_1, 10), + TestFuncs(aom_highbd_12_obmc_variance64x16_c, + aom_highbd_12_obmc_variance64x16_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance16x64_c, + aom_highbd_12_obmc_variance16x64_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance32x8_c, + aom_highbd_12_obmc_variance32x8_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance8x32_c, + aom_highbd_12_obmc_variance8x32_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance16x4_c, + aom_highbd_12_obmc_variance16x4_sse4_1, 12), + TestFuncs(aom_highbd_12_obmc_variance4x16_c, + aom_highbd_12_obmc_variance4x16_sse4_1, 12), +}; + +INSTANTIATE_TEST_SUITE_P(SSE4_1, ObmcVarianceHBDTest, + ::testing::ValuesIn(sse4_functions_hbd)); +#endif // HAVE_SSE4_1 +#endif // CONFIG_AV1_HIGHBITDEPTH && !CONFIG_REALTIME_ONLY +} // namespace diff --git a/third_party/aom/test/pickrst_test.cc b/third_party/aom/test/pickrst_test.cc new file mode 100644 index 0000000000..04b6f45652 --- /dev/null +++ b/third_party/aom/test/pickrst_test.cc @@ -0,0 +1,750 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/register_state_check.h" +#include "test/acm_random.h" +#include "test/util.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom/aom_integer.h" +#include "aom_ports/aom_timer.h" +#include "av1/encoder/pickrst.h" + +#define MAX_DATA_BLOCK 384 + +namespace pickrst_test_lowbd { +static const int kIterations = 100; + +typedef int64_t (*lowbd_pixel_proj_error_func)( + const uint8_t *src8, int width, int height, int src_stride, + const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, + int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params); + +//////////////////////////////////////////////////////////////////////////////// +// 8 bit +//////////////////////////////////////////////////////////////////////////////// + +typedef std::tuple<const lowbd_pixel_proj_error_func> PixelProjErrorTestParam; + +class PixelProjErrorTest + : public ::testing::TestWithParam<PixelProjErrorTestParam> { + public: + void SetUp() override { + target_func_ = GET_PARAM(0); + src_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*src_))); + ASSERT_NE(src_, nullptr); + dgd_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*dgd_))); + ASSERT_NE(dgd_, nullptr); + flt0_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*flt0_))); + ASSERT_NE(flt0_, nullptr); + flt1_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*flt1_))); + ASSERT_NE(flt1_, nullptr); + } + void TearDown() override { + aom_free(src_); + aom_free(dgd_); + aom_free(flt0_); + aom_free(flt1_); + } + void RunPixelProjErrorTest(int32_t run_times); + void RunPixelProjErrorTest_ExtremeValues(); + + private: + lowbd_pixel_proj_error_func target_func_; + libaom_test::ACMRandom rng_; + uint8_t *src_; + uint8_t *dgd_; + int32_t *flt0_; + int32_t *flt1_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelProjErrorTest); + +void PixelProjErrorTest::RunPixelProjErrorTest(int32_t run_times) { + int h_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1; + int v_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1; + const int dgd_stride = MAX_DATA_BLOCK; + const int src_stride = MAX_DATA_BLOCK; + const int flt0_stride = MAX_DATA_BLOCK; + const int flt1_stride = MAX_DATA_BLOCK; + sgr_params_type params; + int xq[2]; + const int iters = run_times == 1 ? kIterations : 4; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + int64_t err_ref = 0, err_test = 1; + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_[i] = rng_.Rand8(); + src_[i] = rng_.Rand8(); + flt0_[i] = rng_.Rand15Signed(); + flt1_[i] = rng_.Rand15Signed(); + } + xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS); + xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS); + params.r[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2); + params.r[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2); + params.s[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2); + params.s[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2); + uint8_t *dgd = dgd_; + uint8_t *src = src_; + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + err_ref = av1_lowbd_pixel_proj_error_c(src, h_end, v_end, src_stride, dgd, + dgd_stride, flt0_, flt0_stride, + flt1_, flt1_stride, xq, ¶ms); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + err_test = + target_func_(src, h_end, v_end, src_stride, dgd, dgd_stride, flt0_, + flt0_stride, flt1_, flt1_stride, xq, ¶ms); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 10) { + printf("r0 %d r1 %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", params.r[0], + params.r[1], h_end, v_end, time1, time2, time1 / time2); + } + ASSERT_EQ(err_ref, err_test); + } +} + +void PixelProjErrorTest::RunPixelProjErrorTest_ExtremeValues() { + const int h_start = 0; + int h_end = 192; + const int v_start = 0; + int v_end = 192; + const int dgd_stride = MAX_DATA_BLOCK; + const int src_stride = MAX_DATA_BLOCK; + const int flt0_stride = MAX_DATA_BLOCK; + const int flt1_stride = MAX_DATA_BLOCK; + sgr_params_type params; + int xq[2]; + const int iters = kIterations; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + int64_t err_ref = 0, err_test = 1; + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_[i] = 0; + src_[i] = 255; + flt0_[i] = rng_.Rand15Signed(); + flt1_[i] = rng_.Rand15Signed(); + } + xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS); + xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS); + params.r[0] = rng_.Rand8() % MAX_RADIUS; + params.r[1] = rng_.Rand8() % MAX_RADIUS; + params.s[0] = rng_.Rand8() % MAX_RADIUS; + params.s[1] = rng_.Rand8() % MAX_RADIUS; + uint8_t *dgd = dgd_; + uint8_t *src = src_; + + err_ref = av1_lowbd_pixel_proj_error_c( + src, h_end - h_start, v_end - v_start, src_stride, dgd, dgd_stride, + flt0_, flt0_stride, flt1_, flt1_stride, xq, ¶ms); + + err_test = target_func_(src, h_end - h_start, v_end - v_start, src_stride, + dgd, dgd_stride, flt0_, flt0_stride, flt1_, + flt1_stride, xq, ¶ms); + + ASSERT_EQ(err_ref, err_test); + } +} + +TEST_P(PixelProjErrorTest, RandomValues) { RunPixelProjErrorTest(1); } + +TEST_P(PixelProjErrorTest, ExtremeValues) { + RunPixelProjErrorTest_ExtremeValues(); +} + +TEST_P(PixelProjErrorTest, DISABLED_Speed) { RunPixelProjErrorTest(200000); } + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, PixelProjErrorTest, + ::testing::Values(av1_lowbd_pixel_proj_error_sse4_1)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 + +INSTANTIATE_TEST_SUITE_P(AVX2, PixelProjErrorTest, + ::testing::Values(av1_lowbd_pixel_proj_error_avx2)); +#endif // HAVE_AVX2 + +#if HAVE_NEON + +INSTANTIATE_TEST_SUITE_P(NEON, PixelProjErrorTest, + ::testing::Values(av1_lowbd_pixel_proj_error_neon)); +#endif // HAVE_NEON + +} // namespace pickrst_test_lowbd + +#if CONFIG_AV1_HIGHBITDEPTH +namespace pickrst_test_highbd { +static const int kIterations = 100; + +typedef int64_t (*highbd_pixel_proj_error_func)( + const uint8_t *src8, int width, int height, int src_stride, + const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, + int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params); + +//////////////////////////////////////////////////////////////////////////////// +// High bit-depth +//////////////////////////////////////////////////////////////////////////////// + +typedef std::tuple<const highbd_pixel_proj_error_func> PixelProjErrorTestParam; + +class PixelProjHighbdErrorTest + : public ::testing::TestWithParam<PixelProjErrorTestParam> { + public: + void SetUp() override { + target_func_ = GET_PARAM(0); + src_ = + (uint16_t *)aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*src_)); + ASSERT_NE(src_, nullptr); + dgd_ = + (uint16_t *)aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*dgd_)); + ASSERT_NE(dgd_, nullptr); + flt0_ = + (int32_t *)aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*flt0_)); + ASSERT_NE(flt0_, nullptr); + flt1_ = + (int32_t *)aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*flt1_)); + ASSERT_NE(flt1_, nullptr); + } + void TearDown() override { + aom_free(src_); + aom_free(dgd_); + aom_free(flt0_); + aom_free(flt1_); + } + void RunPixelProjErrorTest(int32_t run_times); + void RunPixelProjErrorTest_ExtremeValues(); + + private: + highbd_pixel_proj_error_func target_func_; + libaom_test::ACMRandom rng_; + uint16_t *src_; + uint16_t *dgd_; + int32_t *flt0_; + int32_t *flt1_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelProjHighbdErrorTest); + +void PixelProjHighbdErrorTest::RunPixelProjErrorTest(int32_t run_times) { + int h_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1; + int v_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1; + const int dgd_stride = MAX_DATA_BLOCK; + const int src_stride = MAX_DATA_BLOCK; + const int flt0_stride = MAX_DATA_BLOCK; + const int flt1_stride = MAX_DATA_BLOCK; + sgr_params_type params; + int xq[2]; + const int iters = run_times == 1 ? kIterations : 4; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + int64_t err_ref = 0, err_test = 1; + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_[i] = rng_.Rand16() % (1 << 12); + src_[i] = rng_.Rand16() % (1 << 12); + flt0_[i] = rng_.Rand15Signed(); + flt1_[i] = rng_.Rand15Signed(); + } + xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS); + xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS); + params.r[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2); + params.r[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2); + params.s[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2); + params.s[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2); + uint8_t *dgd8 = CONVERT_TO_BYTEPTR(dgd_); + uint8_t *src8 = CONVERT_TO_BYTEPTR(src_); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + err_ref = av1_highbd_pixel_proj_error_c( + src8, h_end, v_end, src_stride, dgd8, dgd_stride, flt0_, flt0_stride, + flt1_, flt1_stride, xq, ¶ms); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + err_test = + target_func_(src8, h_end, v_end, src_stride, dgd8, dgd_stride, flt0_, + flt0_stride, flt1_, flt1_stride, xq, ¶ms); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 10) { + printf("r0 %d r1 %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", params.r[0], + params.r[1], h_end, v_end, time1, time2, time1 / time2); + } + ASSERT_EQ(err_ref, err_test); + } +} + +void PixelProjHighbdErrorTest::RunPixelProjErrorTest_ExtremeValues() { + const int h_start = 0; + int h_end = 192; + const int v_start = 0; + int v_end = 192; + const int dgd_stride = MAX_DATA_BLOCK; + const int src_stride = MAX_DATA_BLOCK; + const int flt0_stride = MAX_DATA_BLOCK; + const int flt1_stride = MAX_DATA_BLOCK; + sgr_params_type params; + int xq[2]; + const int iters = kIterations; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + int64_t err_ref = 0, err_test = 1; + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_[i] = 0; + src_[i] = (1 << 12) - 1; + flt0_[i] = rng_.Rand15Signed(); + flt1_[i] = rng_.Rand15Signed(); + } + xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS); + xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS); + params.r[0] = rng_.Rand8() % MAX_RADIUS; + params.r[1] = rng_.Rand8() % MAX_RADIUS; + params.s[0] = rng_.Rand8() % MAX_RADIUS; + params.s[1] = rng_.Rand8() % MAX_RADIUS; + uint8_t *dgd8 = CONVERT_TO_BYTEPTR(dgd_); + uint8_t *src8 = CONVERT_TO_BYTEPTR(src_); + + err_ref = av1_highbd_pixel_proj_error_c( + src8, h_end - h_start, v_end - v_start, src_stride, dgd8, dgd_stride, + flt0_, flt0_stride, flt1_, flt1_stride, xq, ¶ms); + + err_test = target_func_(src8, h_end - h_start, v_end - v_start, src_stride, + dgd8, dgd_stride, flt0_, flt0_stride, flt1_, + flt1_stride, xq, ¶ms); + + ASSERT_EQ(err_ref, err_test); + } +} + +TEST_P(PixelProjHighbdErrorTest, RandomValues) { RunPixelProjErrorTest(1); } + +TEST_P(PixelProjHighbdErrorTest, ExtremeValues) { + RunPixelProjErrorTest_ExtremeValues(); +} + +TEST_P(PixelProjHighbdErrorTest, DISABLED_Speed) { + RunPixelProjErrorTest(200000); +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, PixelProjHighbdErrorTest, + ::testing::Values(av1_highbd_pixel_proj_error_sse4_1)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 + +INSTANTIATE_TEST_SUITE_P(AVX2, PixelProjHighbdErrorTest, + ::testing::Values(av1_highbd_pixel_proj_error_avx2)); +#endif // HAVE_AVX2 + +#if HAVE_NEON + +INSTANTIATE_TEST_SUITE_P(NEON, PixelProjHighbdErrorTest, + ::testing::Values(av1_highbd_pixel_proj_error_neon)); +#endif // HAVE_NEON + +} // namespace pickrst_test_highbd +#endif // CONFIG_AV1_HIGHBITDEPTH + +//////////////////////////////////////////////////////////////////////////////// +// Get_proj_subspace_Test +//////////////////////////////////////////////////////////////////////////////// + +namespace get_proj_subspace_test_lowbd { +static const int kIterations = 100; + +typedef void (*set_get_proj_subspace)(const uint8_t *src8, int width, + int height, int src_stride, + const uint8_t *dat8, int dat_stride, + int32_t *flt0, int flt0_stride, + int32_t *flt1, int flt1_stride, + int64_t H[2][2], int64_t C[2], + const sgr_params_type *params); + +typedef std::tuple<const set_get_proj_subspace> GetProjSubspaceTestParam; + +class GetProjSubspaceTest + : public ::testing::TestWithParam<GetProjSubspaceTestParam> { + public: + void SetUp() override { + target_func_ = GET_PARAM(0); + src_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*src_))); + ASSERT_NE(src_, nullptr); + dgd_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*dgd_))); + ASSERT_NE(dgd_, nullptr); + flt0_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*flt0_))); + ASSERT_NE(flt0_, nullptr); + flt1_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*flt1_))); + ASSERT_NE(flt1_, nullptr); + } + void TearDown() override { + aom_free(src_); + aom_free(dgd_); + aom_free(flt0_); + aom_free(flt1_); + } + void RunGetProjSubspaceTest(int32_t run_times); + void RunGetProjSubspaceTest_ExtremeValues(); + + private: + set_get_proj_subspace target_func_; + libaom_test::ACMRandom rng_; + uint8_t *src_; + uint8_t *dgd_; + int32_t *flt0_; + int32_t *flt1_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GetProjSubspaceTest); + +void GetProjSubspaceTest::RunGetProjSubspaceTest(int32_t run_times) { + int h_end = run_times != 1 + ? 128 + : ((rng_.Rand16() % MAX_DATA_BLOCK) & + 2147483640); // We test for widths divisible by 8. + int v_end = + run_times != 1 ? 128 : ((rng_.Rand16() % MAX_DATA_BLOCK) & 2147483640); + const int dgd_stride = MAX_DATA_BLOCK; + const int src_stride = MAX_DATA_BLOCK; + const int flt0_stride = MAX_DATA_BLOCK; + const int flt1_stride = MAX_DATA_BLOCK; + sgr_params_type params; + const int iters = run_times == 1 ? kIterations : 3; + static constexpr int kR0[3] = { 1, 1, 0 }; + static constexpr int kR1[3] = { 1, 0, 1 }; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + int64_t C_ref[2] = { 0 }, C_test[2] = { 0 }; + int64_t H_ref[2][2] = { { 0, 0 }, { 0, 0 } }; + int64_t H_test[2][2] = { { 0, 0 }, { 0, 0 } }; + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_[i] = rng_.Rand8(); + src_[i] = rng_.Rand8(); + flt0_[i] = rng_.Rand15Signed(); + flt1_[i] = rng_.Rand15Signed(); + } + + params.r[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : kR0[iter]; + params.r[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : kR1[iter]; + uint8_t *dgd = dgd_; + uint8_t *src = src_; + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + av1_calc_proj_params_c(src, v_end, h_end, src_stride, dgd, dgd_stride, + flt0_, flt0_stride, flt1_, flt1_stride, H_ref, + C_ref, ¶ms); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + target_func_(src, v_end, h_end, src_stride, dgd, dgd_stride, flt0_, + flt0_stride, flt1_, flt1_stride, H_test, C_test, ¶ms); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 10) { + printf("r0 %d r1 %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", params.r[0], + params.r[1], h_end, v_end, time1, time2, time1 / time2); + } else { + ASSERT_EQ(H_ref[0][0], H_test[0][0]); + ASSERT_EQ(H_ref[0][1], H_test[0][1]); + ASSERT_EQ(H_ref[1][0], H_test[1][0]); + ASSERT_EQ(H_ref[1][1], H_test[1][1]); + ASSERT_EQ(C_ref[0], C_test[0]); + ASSERT_EQ(C_ref[1], C_test[1]); + } + } +} + +void GetProjSubspaceTest::RunGetProjSubspaceTest_ExtremeValues() { + const int h_start = 0; + int h_end = MAX_DATA_BLOCK; + const int v_start = 0; + int v_end = MAX_DATA_BLOCK; + const int dgd_stride = MAX_DATA_BLOCK; + const int src_stride = MAX_DATA_BLOCK; + const int flt0_stride = MAX_DATA_BLOCK; + const int flt1_stride = MAX_DATA_BLOCK; + sgr_params_type params; + const int iters = kIterations; + static constexpr int kR0[3] = { 1, 1, 0 }; + static constexpr int kR1[3] = { 1, 0, 1 }; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + int64_t C_ref[2] = { 0 }, C_test[2] = { 0 }; + int64_t H_ref[2][2] = { { 0, 0 }, { 0, 0 } }; + int64_t H_test[2][2] = { { 0, 0 }, { 0, 0 } }; + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_[i] = 0; + src_[i] = 255; + flt0_[i] = rng_.Rand15Signed(); + flt1_[i] = rng_.Rand15Signed(); + } + params.r[0] = kR0[iter % 3]; + params.r[1] = kR1[iter % 3]; + uint8_t *dgd = dgd_; + uint8_t *src = src_; + + av1_calc_proj_params_c(src, h_end - h_start, v_end - v_start, src_stride, + dgd, dgd_stride, flt0_, flt0_stride, flt1_, + flt1_stride, H_ref, C_ref, ¶ms); + + target_func_(src, h_end - h_start, v_end - v_start, src_stride, dgd, + dgd_stride, flt0_, flt0_stride, flt1_, flt1_stride, H_test, + C_test, ¶ms); + + ASSERT_EQ(H_ref[0][0], H_test[0][0]); + ASSERT_EQ(H_ref[0][1], H_test[0][1]); + ASSERT_EQ(H_ref[1][0], H_test[1][0]); + ASSERT_EQ(H_ref[1][1], H_test[1][1]); + ASSERT_EQ(C_ref[0], C_test[0]); + ASSERT_EQ(C_ref[1], C_test[1]); + } +} + +TEST_P(GetProjSubspaceTest, RandomValues) { RunGetProjSubspaceTest(1); } + +TEST_P(GetProjSubspaceTest, ExtremeValues) { + RunGetProjSubspaceTest_ExtremeValues(); +} + +TEST_P(GetProjSubspaceTest, DISABLED_Speed) { RunGetProjSubspaceTest(200000); } + +#if HAVE_SSE4_1 + +INSTANTIATE_TEST_SUITE_P(SSE4_1, GetProjSubspaceTest, + ::testing::Values(av1_calc_proj_params_sse4_1)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 + +INSTANTIATE_TEST_SUITE_P(AVX2, GetProjSubspaceTest, + ::testing::Values(av1_calc_proj_params_avx2)); +#endif // HAVE_AVX2 + +#if HAVE_NEON + +INSTANTIATE_TEST_SUITE_P(NEON, GetProjSubspaceTest, + ::testing::Values(av1_calc_proj_params_neon)); +#endif // HAVE_NEON + +} // namespace get_proj_subspace_test_lowbd + +#if CONFIG_AV1_HIGHBITDEPTH +namespace get_proj_subspace_test_hbd { +static const int kIterations = 100; + +typedef void (*set_get_proj_subspace_hbd)(const uint8_t *src8, int width, + int height, int src_stride, + const uint8_t *dat8, int dat_stride, + int32_t *flt0, int flt0_stride, + int32_t *flt1, int flt1_stride, + int64_t H[2][2], int64_t C[2], + const sgr_params_type *params); + +typedef std::tuple<const set_get_proj_subspace_hbd> GetProjSubspaceHBDTestParam; + +class GetProjSubspaceTestHBD + : public ::testing::TestWithParam<GetProjSubspaceHBDTestParam> { + public: + void SetUp() override { + target_func_ = GET_PARAM(0); + src_ = (uint16_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*src_))); + ASSERT_NE(src_, nullptr); + dgd_ = (uint16_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*dgd_))); + ASSERT_NE(dgd_, nullptr); + flt0_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*flt0_))); + ASSERT_NE(flt0_, nullptr); + flt1_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK * + sizeof(*flt1_))); + ASSERT_NE(flt1_, nullptr); + } + void TearDown() override { + aom_free(src_); + aom_free(dgd_); + aom_free(flt0_); + aom_free(flt1_); + } + void RunGetProjSubspaceTestHBD(int32_t run_times); + void RunGetProjSubspaceTestHBD_ExtremeValues(); + + private: + set_get_proj_subspace_hbd target_func_; + libaom_test::ACMRandom rng_; + uint16_t *src_; + uint16_t *dgd_; + int32_t *flt0_; + int32_t *flt1_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GetProjSubspaceTestHBD); + +void GetProjSubspaceTestHBD::RunGetProjSubspaceTestHBD(int32_t run_times) { + int h_end = run_times != 1 + ? 128 + : ((rng_.Rand16() % MAX_DATA_BLOCK) & + 2147483640); // We test for widths divisible by 8. + int v_end = + run_times != 1 ? 128 : ((rng_.Rand16() % MAX_DATA_BLOCK) & 2147483640); + const int dgd_stride = MAX_DATA_BLOCK; + const int src_stride = MAX_DATA_BLOCK; + const int flt0_stride = MAX_DATA_BLOCK; + const int flt1_stride = MAX_DATA_BLOCK; + sgr_params_type params; + const int iters = run_times == 1 ? kIterations : 3; + static constexpr int kR0[3] = { 1, 1, 0 }; + static constexpr int kR1[3] = { 1, 0, 1 }; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + int64_t C_ref[2] = { 0 }, C_test[2] = { 0 }; + int64_t H_ref[2][2] = { { 0, 0 }, { 0, 0 } }; + int64_t H_test[2][2] = { { 0, 0 }, { 0, 0 } }; + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_[i] = rng_.Rand16() % 4095; + src_[i] = rng_.Rand16() % 4095; + flt0_[i] = rng_.Rand15Signed(); + flt1_[i] = rng_.Rand15Signed(); + } + + params.r[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : kR0[iter]; + params.r[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : kR1[iter]; + uint8_t *dgd = CONVERT_TO_BYTEPTR(dgd_); + uint8_t *src = CONVERT_TO_BYTEPTR(src_); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + av1_calc_proj_params_high_bd_c(src, v_end, h_end, src_stride, dgd, + dgd_stride, flt0_, flt0_stride, flt1_, + flt1_stride, H_ref, C_ref, ¶ms); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + target_func_(src, v_end, h_end, src_stride, dgd, dgd_stride, flt0_, + flt0_stride, flt1_, flt1_stride, H_test, C_test, ¶ms); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 10) { + printf("r0 %d r1 %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", params.r[0], + params.r[1], h_end, v_end, time1, time2, time1 / time2); + } else { + ASSERT_EQ(H_ref[0][0], H_test[0][0]); + ASSERT_EQ(H_ref[0][1], H_test[0][1]); + ASSERT_EQ(H_ref[1][0], H_test[1][0]); + ASSERT_EQ(H_ref[1][1], H_test[1][1]); + ASSERT_EQ(C_ref[0], C_test[0]); + ASSERT_EQ(C_ref[1], C_test[1]); + } + } +} + +void GetProjSubspaceTestHBD::RunGetProjSubspaceTestHBD_ExtremeValues() { + const int h_start = 0; + int h_end = MAX_DATA_BLOCK; + const int v_start = 0; + int v_end = MAX_DATA_BLOCK; + const int dgd_stride = MAX_DATA_BLOCK; + const int src_stride = MAX_DATA_BLOCK; + const int flt0_stride = MAX_DATA_BLOCK; + const int flt1_stride = MAX_DATA_BLOCK; + sgr_params_type params; + const int iters = kIterations; + static constexpr int kR0[3] = { 1, 1, 0 }; + static constexpr int kR1[3] = { 1, 0, 1 }; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + int64_t C_ref[2] = { 0 }, C_test[2] = { 0 }; + int64_t H_ref[2][2] = { { 0, 0 }, { 0, 0 } }; + int64_t H_test[2][2] = { { 0, 0 }, { 0, 0 } }; + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_[i] = 0; + src_[i] = 4095; + flt0_[i] = rng_.Rand15Signed(); + flt1_[i] = rng_.Rand15Signed(); + } + params.r[0] = kR0[iter % 3]; + params.r[1] = kR1[iter % 3]; + uint8_t *dgd = CONVERT_TO_BYTEPTR(dgd_); + uint8_t *src = CONVERT_TO_BYTEPTR(src_); + + av1_calc_proj_params_high_bd_c( + src, h_end - h_start, v_end - v_start, src_stride, dgd, dgd_stride, + flt0_, flt0_stride, flt1_, flt1_stride, H_ref, C_ref, ¶ms); + + target_func_(src, h_end - h_start, v_end - v_start, src_stride, dgd, + dgd_stride, flt0_, flt0_stride, flt1_, flt1_stride, H_test, + C_test, ¶ms); + + ASSERT_EQ(H_ref[0][0], H_test[0][0]); + ASSERT_EQ(H_ref[0][1], H_test[0][1]); + ASSERT_EQ(H_ref[1][0], H_test[1][0]); + ASSERT_EQ(H_ref[1][1], H_test[1][1]); + ASSERT_EQ(C_ref[0], C_test[0]); + ASSERT_EQ(C_ref[1], C_test[1]); + } +} + +TEST_P(GetProjSubspaceTestHBD, RandomValues) { RunGetProjSubspaceTestHBD(1); } + +TEST_P(GetProjSubspaceTestHBD, ExtremeValues) { + RunGetProjSubspaceTestHBD_ExtremeValues(); +} + +TEST_P(GetProjSubspaceTestHBD, DISABLED_Speed) { + RunGetProjSubspaceTestHBD(200000); +} + +#if HAVE_SSE4_1 + +INSTANTIATE_TEST_SUITE_P( + SSE4_1, GetProjSubspaceTestHBD, + ::testing::Values(av1_calc_proj_params_high_bd_sse4_1)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 + +INSTANTIATE_TEST_SUITE_P(AVX2, GetProjSubspaceTestHBD, + ::testing::Values(av1_calc_proj_params_high_bd_avx2)); +#endif // HAVE_AVX2 + +#if HAVE_NEON + +INSTANTIATE_TEST_SUITE_P(NEON, GetProjSubspaceTestHBD, + ::testing::Values(av1_calc_proj_params_high_bd_neon)); +#endif // HAVE_NEON +} // namespace get_proj_subspace_test_hbd + +#endif // CONFIG_AV1_HIGHBITDEPTH diff --git a/third_party/aom/test/postproc_filters_test.cc b/third_party/aom/test/postproc_filters_test.cc new file mode 100644 index 0000000000..9584dd8c35 --- /dev/null +++ b/third_party/aom/test/postproc_filters_test.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2022, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/yuv_video_source.h" + +namespace { + +class PostprocFiltersTest + : public ::libaom_test::CodecTestWith2Params<int, unsigned int>, + public ::libaom_test::EncoderTest { + protected: + PostprocFiltersTest() + : EncoderTest(GET_PARAM(0)), set_skip_postproc_filtering_(false), + frame_number_(0), cpu_used_(GET_PARAM(1)), bd_(GET_PARAM(2)) {} + + void SetUp() override { + InitializeConfig(::libaom_test::kAllIntra); + cfg_.g_input_bit_depth = bd_; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + frame_number_ = video->frame(); + if (frame_number_ == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel); + } + if (set_skip_postproc_filtering_) { + if (frame_number_ == 0) { + encoder->Control(AV1E_SET_SKIP_POSTPROC_FILTERING, 1); + } else if (frame_number_ == 10) { + encoder->Control(AV1E_SET_SKIP_POSTPROC_FILTERING, 0); + } else if (frame_number_ == 20) { + encoder->Control(AV1E_SET_SKIP_POSTPROC_FILTERING, 1); + } + } + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + ::libaom_test::MD5 md5_enc; + md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz); + md5_enc_.push_back(md5_enc.Get()); + } + + void PostEncodeFrameHook(::libaom_test::Encoder *encoder) override { + const aom_image_t *img_enc = encoder->GetPreviewFrame(); + if (!set_skip_postproc_filtering_) { + ASSERT_NE(img_enc, nullptr); + } else { + // Null will be returned if we query the reconstructed frame when + // AV1E_SET_SKIP_POSTPROC_FILTERING is set to 1. + if (frame_number_ < 10) { + ASSERT_EQ(img_enc, nullptr); + } else if (frame_number_ < 20) { + // Reconstructed frame cannot be null when + // AV1E_SET_SKIP_POSTPROC_FILTERING is set to 0. + ASSERT_NE(img_enc, nullptr); + } else { + ASSERT_EQ(img_enc, nullptr); + } + } + } + + // The encoder config flag 'AV1E_SET_SKIP_POSTPROC_FILTERING' can be used to + // skip the application of post-processing filters on reconstructed frame for + // ALLINTRA encode. This unit-test validates the bit exactness of 2 encoded + // streams with 'AV1E_SET_SKIP_POSTPROC_FILTERING': + // 1. disabled for all frames (default case) + // 2. enabled and disabled at different frame indices using control calls. + void DoTest() { + std::unique_ptr<libaom_test::VideoSource> video( + new libaom_test::YUVVideoSource("niklas_640_480_30.yuv", + AOM_IMG_FMT_I420, 640, 480, 30, 1, 0, + kFrames)); + ASSERT_NE(video, nullptr); + + // First encode: 'AV1E_SET_SKIP_POSTPROC_FILTERING' disabled for all frames + // (default case). + set_skip_postproc_filtering_ = false; + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + std::vector<std::string> apply_postproc_filters_md5_enc = + std::move(md5_enc_); + md5_enc_.clear(); + + // Second encode: 'AV1E_SET_SKIP_POSTPROC_FILTERING' enabled and disabled at + // different frame intervals. + set_skip_postproc_filtering_ = true; + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + std::vector<std::string> toggle_apply_postproc_filters_md5_enc = + std::move(md5_enc_); + md5_enc_.clear(); + + // Check for bit match. + ASSERT_EQ(apply_postproc_filters_md5_enc, + toggle_apply_postproc_filters_md5_enc); + } + + bool set_skip_postproc_filtering_; + unsigned int frame_number_; + std::vector<std::string> md5_enc_; + + private: + static constexpr int kFrames = 30; + static constexpr unsigned int kCqLevel = 18; + int cpu_used_; + unsigned int bd_; +}; + +class PostprocFiltersTestLarge : public PostprocFiltersTest {}; + +TEST_P(PostprocFiltersTest, MD5Match) { DoTest(); } + +TEST_P(PostprocFiltersTestLarge, MD5Match) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(PostprocFiltersTest, ::testing::Values(9), + ::testing::Values(8, 10)); + +// Test cpu_used 3 and 6. +AV1_INSTANTIATE_TEST_SUITE(PostprocFiltersTestLarge, ::testing::Values(3, 6), + ::testing::Values(8, 10)); + +} // namespace diff --git a/third_party/aom/test/quant_test.cc b/third_party/aom/test/quant_test.cc new file mode 100644 index 0000000000..afbabb3147 --- /dev/null +++ b/third_party/aom/test/quant_test.cc @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include "config/aom_config.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "av1/encoder/av1_quantize.h" +#include "test/y4m_video_source.h" + +namespace { + +const ::libaom_test::TestMode kTestMode[] = +#if CONFIG_REALTIME_ONLY + { ::libaom_test::kRealTime }; +#else + { ::libaom_test::kRealTime, ::libaom_test::kOnePassGood }; +#endif + +class QMTest + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, + public ::libaom_test::EncoderTest { + protected: + QMTest() : EncoderTest(GET_PARAM(0)) {} + ~QMTest() override = default; + + void SetUp() override { + InitializeConfig(GET_PARAM(1)); + set_cpu_used_ = GET_PARAM(2); + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_SET_ENABLE_QM, 1); + encoder->Control(AV1E_SET_QM_MIN, qm_min_); + encoder->Control(AV1E_SET_QM_MAX, qm_max_); + + encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100); + if (mode_ == ::libaom_test::kRealTime) { + encoder->Control(AV1E_SET_ALLOW_WARPED_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_OBMC, 0); + } + } + } + + void DoTest(int qm_min, int qm_max) { + qm_min_ = qm_min; + qm_max_ = qm_max; + cfg_.kf_max_dist = 12; + cfg_.rc_min_quantizer = 8; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 6; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_target_bitrate = 300; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 15); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + int set_cpu_used_; + int qm_min_; + int qm_max_; +}; + +// encodes and decodes without a mismatch. +TEST_P(QMTest, TestNoMisMatchQM1) { DoTest(5, 9); } + +// encodes and decodes without a mismatch. +TEST_P(QMTest, TestNoMisMatchQM2) { DoTest(0, 8); } + +// encodes and decodes without a mismatch. +TEST_P(QMTest, TestNoMisMatchQM3) { DoTest(9, 15); } + +AV1_INSTANTIATE_TEST_SUITE(QMTest, ::testing::ValuesIn(kTestMode), + ::testing::Range(5, 9)); + +#if !CONFIG_REALTIME_ONLY +typedef struct { + const unsigned int min_q; + const unsigned int max_q; +} QuantParam; + +const QuantParam QuantTestParams[] = { + { 0, 10 }, { 0, 60 }, { 20, 35 }, { 35, 50 }, { 50, 63 } +}; + +std::ostream &operator<<(std::ostream &os, const QuantParam &test_arg) { + return os << "QuantParam { min_q:" << test_arg.min_q + << " max_q:" << test_arg.max_q << " }"; +} + +/* + * This class is used to test whether base_qindex is within min + * and max quantizer range configured by user. + */ +class QuantizerBoundsCheckTestLarge + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, + QuantParam, aom_rc_mode>, + public ::libaom_test::EncoderTest { + protected: + QuantizerBoundsCheckTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + quant_param_(GET_PARAM(2)), rc_end_usage_(GET_PARAM(3)) { + quant_bound_violated_ = false; + } + ~QuantizerBoundsCheckTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = rc_end_usage_; + cfg_.g_threads = 1; + cfg_.rc_min_quantizer = quant_param_.min_q; + cfg_.rc_max_quantizer = quant_param_.max_q; + cfg_.g_lag_in_frames = 35; + if (rc_end_usage_ != AOM_Q) { + cfg_.rc_target_bitrate = 400; + } + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_LAST_QUANTIZER, + &base_qindex_); + min_bound_qindex_ = av1_quantizer_to_qindex(cfg_.rc_min_quantizer); + max_bound_qindex_ = av1_quantizer_to_qindex(cfg_.rc_max_quantizer); + if ((base_qindex_ < min_bound_qindex_ || + base_qindex_ > max_bound_qindex_) && + quant_bound_violated_ == false) { + quant_bound_violated_ = true; + } + } + return AOM_CODEC_OK == res_dec; + } + + ::libaom_test::TestMode encoding_mode_; + const QuantParam quant_param_; + int base_qindex_; + int min_bound_qindex_; + int max_bound_qindex_; + bool quant_bound_violated_; + aom_rc_mode rc_end_usage_; +}; + +TEST_P(QuantizerBoundsCheckTestLarge, QuantizerBoundsCheckEncodeTest) { + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 50); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(quant_bound_violated_, false); +} + +AV1_INSTANTIATE_TEST_SUITE(QuantizerBoundsCheckTestLarge, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood), + ::testing::ValuesIn(QuantTestParams), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ)); +#endif // !CONFIG_REALTIME_ONLY +} // namespace diff --git a/third_party/aom/test/quantize_func_test.cc b/third_party/aom/test/quantize_func_test.cc new file mode 100644 index 0000000000..328d5b10df --- /dev/null +++ b/third_party/aom/test/quantize_func_test.cc @@ -0,0 +1,795 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <algorithm> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "aom/aom_codec.h" +#include "aom_ports/aom_timer.h" +#include "av1/encoder/encoder.h" +#include "av1/common/scan.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { +using libaom_test::ACMRandom; + +#define QUAN_PARAM_LIST \ + const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, \ + const int16_t *round_ptr, const int16_t *quant_ptr, \ + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, \ + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, \ + const int16_t *scan, const int16_t *iscan + +#define LP_QUANTIZE_PARAM_LIST \ + const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, \ + const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, \ + const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, \ + const int16_t *iscan + +typedef void (*LPQuantizeFunc)(LP_QUANTIZE_PARAM_LIST); +typedef void (*QuantizeFunc)(QUAN_PARAM_LIST); +typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale); + +#undef LP_QUANTIZE_PARAM_LIST + +#define HBD_QUAN_FUNC \ + fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \ + qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale) + +#define LBD_QUAN_FUNC \ + fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \ + qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan) + +template <QuantizeFuncHbd fn> +void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) { + const int log_scale = 0; + HBD_QUAN_FUNC; +} + +template <QuantizeFuncHbd fn> +void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) { + const int log_scale = 1; + HBD_QUAN_FUNC; +} + +template <QuantizeFuncHbd fn> +void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) { + const int log_scale = 2; + HBD_QUAN_FUNC; +} + +enum QuantType { TYPE_B, TYPE_DC, TYPE_FP }; + +using std::tuple; + +template <typename FuncType> +using QuantizeParam = + tuple<FuncType, FuncType, TX_SIZE, QuantType, aom_bit_depth_t>; + +typedef struct { + QUANTS quant; + Dequants dequant; +} QuanTable; + +const int kTestNum = 1000; + +#define GET_TEMPLATE_PARAM(k) std::get<k>(this->GetParam()) + +template <typename CoeffType, typename FuncType> +class QuantizeTestBase + : public ::testing::TestWithParam<QuantizeParam<FuncType>> { + protected: + QuantizeTestBase() + : quant_ref_(GET_TEMPLATE_PARAM(0)), quant_(GET_TEMPLATE_PARAM(1)), + tx_size_(GET_TEMPLATE_PARAM(2)), type_(GET_TEMPLATE_PARAM(3)), + bd_(GET_TEMPLATE_PARAM(4)) {} + + ~QuantizeTestBase() override = default; + + void SetUp() override { + qtab_ = reinterpret_cast<QuanTable *>(aom_memalign(32, sizeof(*qtab_))); + ASSERT_NE(qtab_, nullptr); + const int n_coeffs = coeff_num(); + coeff_ = reinterpret_cast<CoeffType *>( + aom_memalign(32, 6 * n_coeffs * sizeof(CoeffType))); + ASSERT_NE(coeff_, nullptr); + InitQuantizer(); + } + + void TearDown() override { + aom_free(qtab_); + qtab_ = nullptr; + aom_free(coeff_); + coeff_ = nullptr; + } + + void InitQuantizer() { + av1_build_quantizer(bd_, 0, 0, 0, 0, 0, &qtab_->quant, &qtab_->dequant); + } + + virtual void RunQuantizeFunc( + const CoeffType *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, CoeffType *qcoeff_ptr, + CoeffType *qcoeff_ref_ptr, CoeffType *dqcoeff_ptr, + CoeffType *dqcoeff_ref_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ref_ptr, uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) = 0; + + void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) { + CoeffType *coeff_ptr = coeff_; + const intptr_t n_coeffs = coeff_num(); + + CoeffType *qcoeff_ref = coeff_ptr + n_coeffs; + CoeffType *dqcoeff_ref = qcoeff_ref + n_coeffs; + + CoeffType *qcoeff = dqcoeff_ref + n_coeffs; + CoeffType *dqcoeff = qcoeff + n_coeffs; + uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs); + + // Testing uses 2-D DCT scan order table + const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT); + + // Testing uses luminance quantization table + const int16_t *zbin = qtab_->quant.y_zbin[q]; + + const int16_t *round = nullptr; + const int16_t *quant = nullptr; + if (type_ == TYPE_B) { + round = qtab_->quant.y_round[q]; + quant = qtab_->quant.y_quant[q]; + } else if (type_ == TYPE_FP) { + round = qtab_->quant.y_round_fp[q]; + quant = qtab_->quant.y_quant_fp[q]; + } + + const int16_t *quant_shift = qtab_->quant.y_quant_shift[q]; + const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q]; + + for (int i = 0; i < test_num; ++i) { + if (is_loop) FillCoeffRandom(); + + memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref)); + + RunQuantizeFunc(coeff_ptr, n_coeffs, zbin, round, quant, quant_shift, + qcoeff, qcoeff_ref, dqcoeff, dqcoeff_ref, dequant, + &eob[0], &eob[1], sc->scan, sc->iscan); + + for (int j = 0; j < n_coeffs; ++j) { + ASSERT_EQ(qcoeff_ref[j], qcoeff[j]) + << "Q mismatch on test: " << i << " at position: " << j + << " Q: " << q << " coeff: " << coeff_ptr[j]; + } + + for (int j = 0; j < n_coeffs; ++j) { + ASSERT_EQ(dqcoeff_ref[j], dqcoeff[j]) + << "Dq mismatch on test: " << i << " at position: " << j + << " Q: " << q << " coeff: " << coeff_ptr[j]; + } + + ASSERT_EQ(eob[0], eob[1]) + << "eobs mismatch on test: " << i << " Q: " << q; + } + } + + void CompareResults(const CoeffType *buf_ref, const CoeffType *buf, int size, + const char *text, int q, int number) { + int i; + for (i = 0; i < size; ++i) { + ASSERT_EQ(buf_ref[i], buf[i]) << text << " mismatch on test: " << number + << " at position: " << i << " Q: " << q; + } + } + + int coeff_num() const { return av1_get_max_eob(tx_size_); } + + void FillCoeff(CoeffType c) { + const int n_coeffs = coeff_num(); + for (int i = 0; i < n_coeffs; ++i) { + coeff_[i] = c; + } + } + + void FillCoeffRandom() { + const int n_coeffs = coeff_num(); + FillCoeffZero(); + const int num = rnd_.Rand16() % n_coeffs; + // Randomize the first non zero coeff position. + const int start = rnd_.Rand16() % n_coeffs; + const int end = std::min(start + num, n_coeffs); + for (int i = start; i < end; ++i) { + coeff_[i] = GetRandomCoeff(); + } + } + + void FillCoeffRandomRows(int num) { + FillCoeffZero(); + for (int i = 0; i < num; ++i) { + coeff_[i] = GetRandomCoeff(); + } + } + + void FillCoeffZero() { FillCoeff(0); } + + void FillCoeffConstant() { + CoeffType c = GetRandomCoeff(); + FillCoeff(c); + } + + void FillDcOnly() { + FillCoeffZero(); + coeff_[0] = GetRandomCoeff(); + } + + void FillDcLargeNegative() { + FillCoeffZero(); + // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues + // like BUG=883 where the constant being compared was incorrectly + // initialized. + coeff_[0] = -8191; + } + + CoeffType GetRandomCoeff() { + CoeffType coeff; + if (bd_ == AOM_BITS_8) { + coeff = + clamp(static_cast<int16_t>(rnd_.Rand16()), INT16_MIN + 1, INT16_MAX); + } else { + CoeffType min = -(1 << (7 + bd_)); + CoeffType max = -min - 1; + coeff = clamp(static_cast<CoeffType>(rnd_.Rand31()), min, max); + } + return coeff; + } + + ACMRandom rnd_; + QuanTable *qtab_; + CoeffType *coeff_; + FuncType quant_ref_; + FuncType quant_; + TX_SIZE tx_size_; + QuantType type_; + aom_bit_depth_t bd_; +}; + +class FullPrecisionQuantizeTest + : public QuantizeTestBase<tran_low_t, QuantizeFunc> { + void RunQuantizeFunc(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + const int16_t *zbin_ptr, const int16_t *round_ptr, + const int16_t *quant_ptr, const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *qcoeff_ref_ptr, + tran_low_t *dqcoeff_ptr, tran_low_t *dqcoeff_ref_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ref_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) override { + quant_ref_(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, + quant_shift_ptr, qcoeff_ref_ptr, dqcoeff_ref_ptr, dequant_ptr, + eob_ref_ptr, scan, iscan); + + API_REGISTER_STATE_CHECK(quant_( + coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, + qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)); + } +}; + +class LowPrecisionQuantizeTest + : public QuantizeTestBase<int16_t, LPQuantizeFunc> { + void RunQuantizeFunc(const int16_t *coeff_ptr, intptr_t n_coeffs, + const int16_t * /*zbin_ptr*/, const int16_t *round_ptr, + const int16_t *quant_ptr, + const int16_t * /*quant_shift_ptr*/, int16_t *qcoeff_ptr, + int16_t *qcoeff_ref_ptr, int16_t *dqcoeff_ptr, + int16_t *dqcoeff_ref_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ref_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) override { + quant_ref_(coeff_ptr, n_coeffs, round_ptr, quant_ptr, qcoeff_ref_ptr, + dqcoeff_ref_ptr, dequant_ptr, eob_ref_ptr, scan, iscan); + + API_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, round_ptr, quant_ptr, + qcoeff_ptr, dqcoeff_ptr, dequant_ptr, + eob_ptr, scan, iscan)); + } +}; + +TEST_P(FullPrecisionQuantizeTest, ZeroInput) { + FillCoeffZero(); + QuantizeRun(false); +} + +TEST_P(FullPrecisionQuantizeTest, LargeNegativeInput) { + FillDcLargeNegative(); + QuantizeRun(false, 0, 1); +} + +TEST_P(FullPrecisionQuantizeTest, DcOnlyInput) { + FillDcOnly(); + QuantizeRun(false, 0, 1); +} + +TEST_P(FullPrecisionQuantizeTest, RandomInput) { + QuantizeRun(true, 0, kTestNum); +} + +TEST_P(FullPrecisionQuantizeTest, MultipleQ) { + for (int q = 0; q < QINDEX_RANGE; ++q) { + QuantizeRun(true, q, kTestNum); + } +} + +// Force the coeff to be half the value of the dequant. This exposes a +// mismatch found in av1_quantize_fp_sse2(). +TEST_P(FullPrecisionQuantizeTest, CoeffHalfDequant) { + FillCoeff(16); + QuantizeRun(false, 25, 1); +} + +TEST_P(FullPrecisionQuantizeTest, DISABLED_Speed) { + tran_low_t *coeff_ptr = coeff_; + const intptr_t n_coeffs = coeff_num(); + + tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs; + tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs; + + tran_low_t *qcoeff = dqcoeff_ref + n_coeffs; + tran_low_t *dqcoeff = qcoeff + n_coeffs; + uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs); + + // Testing uses 2-D DCT scan order table + const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT); + + // Testing uses luminance quantization table + const int q = 22; + const int16_t *zbin = qtab_->quant.y_zbin[q]; + const int16_t *round_fp = qtab_->quant.y_round_fp[q]; + const int16_t *quant_fp = qtab_->quant.y_quant_fp[q]; + const int16_t *quant_shift = qtab_->quant.y_quant_shift[q]; + const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q]; + const int kNumTests = 5000000; + aom_usec_timer timer, simd_timer; + int rows = tx_size_high[tx_size_]; + int cols = tx_size_wide[tx_size_]; + rows = AOMMIN(32, rows); + cols = AOMMIN(32, cols); + for (int cnt = 0; cnt <= rows; cnt++) { + FillCoeffRandomRows(cnt * cols); + + aom_usec_timer_start(&timer); + for (int n = 0; n < kNumTests; ++n) { + quant_ref_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, + qcoeff, dqcoeff, dequant, eob, sc->scan, sc->iscan); + } + aom_usec_timer_mark(&timer); + + aom_usec_timer_start(&simd_timer); + for (int n = 0; n < kNumTests; ++n) { + quant_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, qcoeff, + dqcoeff, dequant, eob, sc->scan, sc->iscan); + } + aom_usec_timer_mark(&simd_timer); + + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + const int simd_elapsed_time = + static_cast<int>(aom_usec_timer_elapsed(&simd_timer)); + printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time, + simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time)); + } +} + +// TODO(crbug.com/aomedia/2796) +TEST_P(LowPrecisionQuantizeTest, ZeroInput) { + FillCoeffZero(); + QuantizeRun(false); +} + +TEST_P(LowPrecisionQuantizeTest, LargeNegativeInput) { + FillDcLargeNegative(); + QuantizeRun(false, 0, 1); +} + +TEST_P(LowPrecisionQuantizeTest, DcOnlyInput) { + FillDcOnly(); + QuantizeRun(false, 0, 1); +} + +TEST_P(LowPrecisionQuantizeTest, RandomInput) { + QuantizeRun(true, 0, kTestNum); +} + +TEST_P(LowPrecisionQuantizeTest, MultipleQ) { + for (int q = 0; q < QINDEX_RANGE; ++q) { + QuantizeRun(true, q, kTestNum); + } +} + +// Force the coeff to be half the value of the dequant. This exposes a +// mismatch found in av1_quantize_fp_sse2(). +TEST_P(LowPrecisionQuantizeTest, CoeffHalfDequant) { + FillCoeff(16); + QuantizeRun(false, 25, 1); +} + +TEST_P(LowPrecisionQuantizeTest, DISABLED_Speed) { + int16_t *coeff_ptr = coeff_; + const intptr_t n_coeffs = coeff_num(); + + int16_t *qcoeff_ref = coeff_ptr + n_coeffs; + int16_t *dqcoeff_ref = qcoeff_ref + n_coeffs; + + int16_t *qcoeff = dqcoeff_ref + n_coeffs; + int16_t *dqcoeff = qcoeff + n_coeffs; + uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs); + + // Testing uses 2-D DCT scan order table + const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT); + + // Testing uses luminance quantization table + const int q = 22; + const int16_t *round_fp = qtab_->quant.y_round_fp[q]; + const int16_t *quant_fp = qtab_->quant.y_quant_fp[q]; + const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q]; + const int kNumTests = 5000000; + aom_usec_timer timer, simd_timer; + int rows = tx_size_high[tx_size_]; + int cols = tx_size_wide[tx_size_]; + rows = AOMMIN(32, rows); + cols = AOMMIN(32, cols); + for (int cnt = 0; cnt <= rows; cnt++) { + FillCoeffRandomRows(cnt * cols); + + aom_usec_timer_start(&timer); + for (int n = 0; n < kNumTests; ++n) { + quant_ref_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff, + dequant, eob, sc->scan, sc->iscan); + } + aom_usec_timer_mark(&timer); + + aom_usec_timer_start(&simd_timer); + for (int n = 0; n < kNumTests; ++n) { + quant_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff, dequant, + eob, sc->scan, sc->iscan); + } + aom_usec_timer_mark(&simd_timer); + + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + const int simd_elapsed_time = + static_cast<int>(aom_usec_timer_elapsed(&simd_timer)); + printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time, + simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time)); + } +} + +using std::make_tuple; + +#if HAVE_AVX2 + +const QuantizeParam<LPQuantizeFunc> kLPQParamArrayAvx2[] = { + make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2, + static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2, + static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8) +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, LowPrecisionQuantizeTest, + ::testing::ValuesIn(kLPQParamArrayAvx2)); + +const QuantizeParam<QuantizeFunc> kQParamArrayAvx2[] = { + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, + static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, + static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, + static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, + static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, + static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, + static_cast<TX_SIZE>(TX_16X64), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, + static_cast<TX_SIZE>(TX_64X16), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_avx2, + static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), + make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_10), + make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12), + make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, + static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8), + make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, + static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_10), + make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, + static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12), + make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, + static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8), + make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, + static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_10), + make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, + static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10), + make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_avx2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_avx2, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_quantize_b_adaptive_c, + &aom_highbd_quantize_b_adaptive_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_highbd_quantize_b_adaptive_c, + &aom_highbd_quantize_b_adaptive_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10), + make_tuple(&aom_highbd_quantize_b_adaptive_c, + &aom_highbd_quantize_b_adaptive_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, + &aom_highbd_quantize_b_32x32_adaptive_avx2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), + make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, + &aom_highbd_quantize_b_32x32_adaptive_avx2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10), + make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, + &aom_highbd_quantize_b_32x32_adaptive_avx2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), +#endif // !CONFIG_REALTIME_ONLY +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2, + static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2, + static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8), +#endif // !CONFIG_REALTIME_ONLY + make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_avx2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_avx2, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8), +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, FullPrecisionQuantizeTest, + ::testing::ValuesIn(kQParamArrayAvx2)); +#endif // HAVE_AVX2 + +#if HAVE_SSE2 + +const QuantizeParam<LPQuantizeFunc> kLPQParamArraySSE2[] = { + make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2, + static_cast<TX_SIZE>(TX_8X8), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2, + static_cast<TX_SIZE>(TX_4X4), TYPE_FP, AOM_BITS_8) +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, LowPrecisionQuantizeTest, + ::testing::ValuesIn(kLPQParamArraySSE2)); + +const QuantizeParam<QuantizeFunc> kQParamArraySSE2[] = { + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, + static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, + static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, + static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, + static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8), + make_tuple(&aom_quantize_b_c, &aom_quantize_b_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10), + make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_quantize_b_adaptive_c, + &aom_highbd_quantize_b_adaptive_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_highbd_quantize_b_adaptive_c, + &aom_highbd_quantize_b_adaptive_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10), + make_tuple(&aom_highbd_quantize_b_adaptive_c, + &aom_highbd_quantize_b_adaptive_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), + make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10), + make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, + &aom_highbd_quantize_b_32x32_adaptive_sse2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), + make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, + &aom_highbd_quantize_b_32x32_adaptive_sse2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10), + make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, + &aom_highbd_quantize_b_32x32_adaptive_sse2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), +#endif // !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8), + make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10), + make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c, + &aom_highbd_quantize_b_64x64_adaptive_sse2, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8), + make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c, + &aom_highbd_quantize_b_64x64_adaptive_sse2, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10), + make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c, + &aom_highbd_quantize_b_64x64_adaptive_sse2, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), +#endif // !CONFIG_REALTIME_ONLY +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2, + static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2, + static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_32x32_adaptive_c, + &aom_quantize_b_32x32_adaptive_sse2, + static_cast<TX_SIZE>(TX_32X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_32x32_adaptive_c, + &aom_quantize_b_32x32_adaptive_sse2, + static_cast<TX_SIZE>(TX_16X32), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_32x32_adaptive_c, + &aom_quantize_b_32x32_adaptive_sse2, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_64x64_adaptive_c, + &aom_quantize_b_64x64_adaptive_sse2, + static_cast<TX_SIZE>(TX_32X64), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_64x64_adaptive_c, + &aom_quantize_b_64x64_adaptive_sse2, + static_cast<TX_SIZE>(TX_64X32), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_64x64_adaptive_c, + &aom_quantize_b_64x64_adaptive_sse2, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8) +#endif // !CONFIG_REALTIME_ONLY +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, FullPrecisionQuantizeTest, + ::testing::ValuesIn(kQParamArraySSE2)); +#endif + +#if HAVE_NEON + +const QuantizeParam<LPQuantizeFunc> kLPQParamArrayNEON[] = { + make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), + make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon, + static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8), + make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon, + static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8) +}; + +INSTANTIATE_TEST_SUITE_P(NEON, LowPrecisionQuantizeTest, + ::testing::ValuesIn(kLPQParamArrayNEON)); + +const QuantizeParam<QuantizeFunc> kQParamArrayNEON[] = { + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, + static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, + static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, + static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, + static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_neon, + static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8), + make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_neon, + static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8), + make_tuple(&aom_quantize_b_c, &aom_quantize_b_neon, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_neon, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_neon, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8), + +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_neon>, + static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12), + make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_neon>, + static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12), + make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>, + &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_neon>, + static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_neon, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_neon, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_neon, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), +#if !CONFIG_REALTIME_ONLY + make_tuple(&aom_highbd_quantize_b_adaptive_c, + &aom_highbd_quantize_b_adaptive_neon, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, + &aom_highbd_quantize_b_32x32_adaptive_neon, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), + make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c, + &aom_highbd_quantize_b_64x64_adaptive_neon, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), +#endif // !CONFIG_REALTIME_ONLY +#endif // CONFIG_AV1_HIGHBITDEPTH +}; + +INSTANTIATE_TEST_SUITE_P(NEON, FullPrecisionQuantizeTest, + ::testing::ValuesIn(kQParamArrayNEON)); +#endif + +#if HAVE_SSSE3 && AOM_ARCH_X86_64 +INSTANTIATE_TEST_SUITE_P( + SSSE3, FullPrecisionQuantizeTest, + ::testing::Values( + make_tuple(&aom_quantize_b_c, &aom_quantize_b_ssse3, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_ssse3, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_ssse3, + static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8))); + +#endif // HAVE_SSSE3 && AOM_ARCH_X86_64 + +#if HAVE_AVX +INSTANTIATE_TEST_SUITE_P( + AVX, FullPrecisionQuantizeTest, + ::testing::Values( + make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx, + static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), + make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_avx, + static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8))); + +#endif // HAVE_AVX + +} // namespace diff --git a/third_party/aom/test/ratectrl_rtc_test.cc b/third_party/aom/test/ratectrl_rtc_test.cc new file mode 100644 index 0000000000..cc054b6926 --- /dev/null +++ b/third_party/aom/test/ratectrl_rtc_test.cc @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "av1/ratectrl_rtc.h" + +#include <memory> + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/i420_video_source.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +constexpr size_t kNumFrames = 450; + +const int kTemporalId3Layer[4] = { 0, 2, 1, 2 }; +const int kTemporalId2Layer[2] = { 0, 1 }; +const int kTemporalRateAllocation3Layer[3] = { 50, 70, 100 }; +const int kTemporalRateAllocation2Layer[2] = { 60, 100 }; +const int kSpatialLayerBitrate[3] = { 200, 500, 900 }; + +// Parameter: aq mode: 0 and 3 +class RcInterfaceTest : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<int> { + public: + RcInterfaceTest() + : EncoderTest(GET_PARAM(0)), aq_mode_(GET_PARAM(1)), key_interval_(3000), + encoder_exit_(false), layer_frame_cnt_(0), superframe_cnt_(0), + frame_cnt_(0), dynamic_temporal_layers_(false), + dynamic_spatial_layers_(false), num_drops_(0), max_consec_drop_(0), + frame_drop_thresh_(0) { + memset(&svc_params_, 0, sizeof(svc_params_)); + memset(&layer_id_, 0, sizeof(layer_id_)); + } + + ~RcInterfaceTest() override = default; + + protected: + void SetUp() override { InitializeConfig(::libaom_test::kRealTime); } + + int GetNumSpatialLayers() override { return rc_cfg_.ss_number_layers; } + + void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) override { + int key_int = key_interval_; + const int use_svc = + rc_cfg_.ss_number_layers > 1 || rc_cfg_.ts_number_layers > 1; + encoder->Control(AV1E_SET_RTC_EXTERNAL_RC, 1); + if (video->frame() == 0 && layer_frame_cnt_ == 0) { + encoder->Control(AOME_SET_CPUUSED, 7); + encoder->Control(AV1E_SET_AQ_MODE, aq_mode_); + if (rc_cfg_.is_screen) { + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN); + } else { + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); + } + encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, + rc_cfg_.max_intra_bitrate_pct); + if (use_svc) encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + encoder->Control(AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR, max_consec_drop_); + } + // SVC specific settings + if (use_svc) { + frame_params_.spatial_layer_id = + layer_frame_cnt_ % rc_cfg_.ss_number_layers; + if (rc_cfg_.ts_number_layers == 3) + frame_params_.temporal_layer_id = + kTemporalId3Layer[superframe_cnt_ % 4]; + else if (rc_cfg_.ts_number_layers == 2) + frame_params_.temporal_layer_id = + kTemporalId2Layer[superframe_cnt_ % 2]; + else + frame_params_.temporal_layer_id = 0; + layer_id_.spatial_layer_id = frame_params_.spatial_layer_id; + layer_id_.temporal_layer_id = frame_params_.temporal_layer_id; + encoder->Control(AV1E_SET_SVC_LAYER_ID, &layer_id_); + key_int = key_interval_ * rc_cfg_.ss_number_layers; + } + frame_params_.frame_type = + layer_frame_cnt_ % key_int == 0 ? aom::kKeyFrame : aom::kInterFrame; + encoder_exit_ = video->frame() == kNumFrames; + frame_flags_ = 0; + + if (dynamic_temporal_layers_) { + if (superframe_cnt_ == 100 && layer_id_.spatial_layer_id == 0) { + // Go down to 2 temporal layers. + SetConfigSvc(3, 2); + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_)); + } else if (superframe_cnt_ == 200 && layer_id_.spatial_layer_id == 0) { + // Go down to 1 temporal layer. + SetConfigSvc(3, 1); + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_)); + } else if (superframe_cnt_ == 300 && layer_id_.spatial_layer_id == 0) { + // Go back up to 3 temporal layers. + SetConfigSvc(3, 3); + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_)); + } + } else if (dynamic_spatial_layers_) { + // In this example the #spatial layers is modified on the fly, + // so we go from (120p,240p,480p) to (240p,480p), etc. + if (superframe_cnt_ == 100 && layer_id_.spatial_layer_id == 0) { + // Change to 2 spatial layers (240p, 480p). + SetConfigSvc(2, 3); + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_)); + } else if (superframe_cnt_ == 200 && layer_id_.spatial_layer_id == 0) { + // Change to 1 spatial layer (480p). + SetConfigSvc(1, 3); + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_)); + } else if (superframe_cnt_ == 300 && layer_id_.spatial_layer_id == 0) { + // Go back to 3 spatial layers (120p, 240p, 480p). + SetConfigSvc(3, 3); + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + // In the fixed SVC mode (which is what is used in this test): + // Key frame is required here on SL0 since 120p will try to predict + // from LAST which was the 480p, so decoder will throw an error + // (reference must be smaller than 4x4). In the flexible mode + // (not used here) we can set the frame flags to predict off the 2x2 + // reference instead, + frame_flags_ = AOM_EFLAG_FORCE_KF; + frame_params_.frame_type = aom::kKeyFrame; + ASSERT_TRUE(rc_api_->UpdateRateControl(rc_cfg_)); + } + } + // TODO(marpan): Add dynamic spatial layers based on 0 layer bitrate. + // That is actual usage in SW where configuration (#spatial, #temporal) + // layers is fixed, but top layer is dropped or re-enabled based on + // bitrate. This requires external RC to handle dropped (zero-size) frames. + } + + void PostEncodeFrameHook(::libaom_test::Encoder *encoder) override { + if (encoder_exit_) { + return; + } + layer_frame_cnt_++; + frame_cnt_++; + if (layer_id_.spatial_layer_id == rc_cfg_.ss_number_layers - 1) + superframe_cnt_++; + int qp; + encoder->Control(AOME_GET_LAST_QUANTIZER, &qp); + if (rc_api_->ComputeQP(frame_params_) == aom::FrameDropDecision::kOk) { + ASSERT_EQ(rc_api_->GetQP(), qp) << "at frame " << frame_cnt_ - 1; + int encoder_lpf_level; + encoder->Control(AOME_GET_LOOPFILTER_LEVEL, &encoder_lpf_level); + aom::AV1LoopfilterLevel loopfilter_level = rc_api_->GetLoopfilterLevel(); + ASSERT_EQ(loopfilter_level.filter_level[0], encoder_lpf_level); + aom::AV1CdefInfo cdef_level = rc_api_->GetCdefInfo(); + int cdef_y_strengths[16]; + encoder->Control(AV1E_GET_LUMA_CDEF_STRENGTH, cdef_y_strengths); + ASSERT_EQ(cdef_level.cdef_strength_y, cdef_y_strengths[0]); + } else { + num_drops_++; + } + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + if (layer_id_.spatial_layer_id == 0) + rc_api_->PostEncodeUpdate(pkt->data.frame.sz - 2); + else + rc_api_->PostEncodeUpdate(pkt->data.frame.sz); + } + + void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) override { + (void)img1; + (void)img2; + } + + void RunOneLayer() { + key_interval_ = 10000; + SetConfig(); + rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_); + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + void RunOneLayerScreen() { + key_interval_ = 10000; + SetConfig(); + rc_cfg_.is_screen = true; + rc_cfg_.width = 352; + rc_cfg_.height = 288; + rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_); + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 140); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + void RunOneLayerDropFramesCBR() { + key_interval_ = 10000; + max_consec_drop_ = 8; + frame_drop_thresh_ = 30; + SetConfig(); + rc_cfg_.target_bandwidth = 100; + cfg_.rc_target_bitrate = 100; + rc_cfg_.max_quantizer = 50; + cfg_.rc_max_quantizer = 50; + rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_); + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Check that some frames were dropped, otherwise test has no value. + ASSERT_GE(num_drops_, 1); + } + + void RunOneLayerPeriodicKey() { + key_interval_ = 100; + SetConfig(); + rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_); + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + void RunSvc() { + key_interval_ = 10000; + SetConfigSvc(3, 3); + rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_); + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + void RunSvcPeriodicKey() { + key_interval_ = 100; + SetConfigSvc(3, 3); + rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_); + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + void RunSvcDynamicTemporal() { + dynamic_temporal_layers_ = true; + key_interval_ = 10000; + SetConfigSvc(3, 3); + rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_); + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + void RunSvcDynamicSpatial() { + dynamic_spatial_layers_ = true; + key_interval_ = 10000; + SetConfigSvc(3, 3); + rc_api_ = aom::AV1RateControlRTC::Create(rc_cfg_); + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + private: + void SetConfig() { + rc_cfg_.width = 640; + rc_cfg_.height = 480; + rc_cfg_.max_quantizer = 52; + rc_cfg_.min_quantizer = 2; + rc_cfg_.target_bandwidth = 1000; + rc_cfg_.buf_initial_sz = 600; + rc_cfg_.buf_optimal_sz = 600; + rc_cfg_.buf_sz = 1000; + rc_cfg_.undershoot_pct = 50; + rc_cfg_.overshoot_pct = 50; + rc_cfg_.max_intra_bitrate_pct = 1000; + rc_cfg_.framerate = 30.0; + rc_cfg_.ss_number_layers = 1; + rc_cfg_.ts_number_layers = 1; + rc_cfg_.scaling_factor_num[0] = 1; + rc_cfg_.scaling_factor_den[0] = 1; + rc_cfg_.layer_target_bitrate[0] = 1000; + rc_cfg_.max_quantizers[0] = 52; + rc_cfg_.min_quantizers[0] = 2; + rc_cfg_.aq_mode = aq_mode_; + rc_cfg_.frame_drop_thresh = frame_drop_thresh_; + rc_cfg_.max_consec_drop = max_consec_drop_; + + // Encoder settings for ground truth. + cfg_.g_w = 640; + cfg_.g_h = 480; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_initial_sz = 600; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 52; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.rc_target_bitrate = 1000; + cfg_.kf_min_dist = key_interval_; + cfg_.kf_max_dist = key_interval_; + cfg_.rc_dropframe_thresh = frame_drop_thresh_; + } + + void SetConfigSvc(int number_spatial_layers, int number_temporal_layers) { + rc_cfg_.width = 640; + rc_cfg_.height = 480; + rc_cfg_.max_quantizer = 56; + rc_cfg_.min_quantizer = 2; + rc_cfg_.buf_initial_sz = 600; + rc_cfg_.buf_optimal_sz = 600; + rc_cfg_.buf_sz = 1000; + rc_cfg_.undershoot_pct = 50; + rc_cfg_.overshoot_pct = 50; + rc_cfg_.max_intra_bitrate_pct = 1000; + rc_cfg_.framerate = 30.0; + rc_cfg_.aq_mode = aq_mode_; + rc_cfg_.ss_number_layers = number_spatial_layers; + rc_cfg_.ts_number_layers = number_temporal_layers; + + // Encoder settings for ground truth. + cfg_.g_w = 640; + cfg_.g_h = 480; + cfg_.rc_max_quantizer = 56; + cfg_.rc_min_quantizer = 2; + cfg_.rc_buf_initial_sz = 600; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_undershoot_pct = 50; + cfg_.g_threads = 1; + cfg_.kf_min_dist = key_interval_; + cfg_.kf_max_dist = key_interval_; + cfg_.g_timebase.num = 1; + cfg_.g_timebase.den = 30; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + svc_params_.number_spatial_layers = number_spatial_layers; + svc_params_.number_temporal_layers = number_temporal_layers; + + // Scale factors. + if (number_spatial_layers == 3) { + rc_cfg_.scaling_factor_num[0] = 1; + rc_cfg_.scaling_factor_den[0] = 4; + rc_cfg_.scaling_factor_num[1] = 2; + rc_cfg_.scaling_factor_den[1] = 4; + rc_cfg_.scaling_factor_num[2] = 4; + rc_cfg_.scaling_factor_den[2] = 4; + svc_params_.scaling_factor_num[0] = 1; + svc_params_.scaling_factor_den[0] = 4; + svc_params_.scaling_factor_num[1] = 2; + svc_params_.scaling_factor_den[1] = 4; + svc_params_.scaling_factor_num[2] = 4; + svc_params_.scaling_factor_den[2] = 4; + } else if (number_spatial_layers == 2) { + rc_cfg_.scaling_factor_num[0] = 1; + rc_cfg_.scaling_factor_den[0] = 2; + rc_cfg_.scaling_factor_num[1] = 2; + rc_cfg_.scaling_factor_den[1] = 2; + svc_params_.scaling_factor_num[0] = 1; + svc_params_.scaling_factor_den[0] = 2; + svc_params_.scaling_factor_num[1] = 2; + svc_params_.scaling_factor_den[1] = 2; + } else if (number_spatial_layers == 1) { + rc_cfg_.scaling_factor_num[0] = 1; + rc_cfg_.scaling_factor_den[0] = 1; + svc_params_.scaling_factor_num[0] = 1; + svc_params_.scaling_factor_den[0] = 1; + } + + // TS rate decimator. + if (number_temporal_layers == 3) { + rc_cfg_.ts_rate_decimator[0] = 4; + rc_cfg_.ts_rate_decimator[1] = 2; + rc_cfg_.ts_rate_decimator[2] = 1; + svc_params_.framerate_factor[0] = 4; + svc_params_.framerate_factor[1] = 2; + svc_params_.framerate_factor[2] = 1; + } else if (number_temporal_layers == 2) { + rc_cfg_.ts_rate_decimator[0] = 2; + rc_cfg_.ts_rate_decimator[1] = 1; + svc_params_.framerate_factor[0] = 2; + svc_params_.framerate_factor[1] = 1; + } else if (number_temporal_layers == 1) { + rc_cfg_.ts_rate_decimator[0] = 1; + svc_params_.framerate_factor[0] = 1; + } + + // Bitate. + rc_cfg_.target_bandwidth = 0; + cfg_.rc_target_bitrate = 0; + for (int sl = 0; sl < number_spatial_layers; sl++) { + int spatial_bitrate = 0; + if (number_spatial_layers <= 3) + spatial_bitrate = kSpatialLayerBitrate[sl]; + for (int tl = 0; tl < number_temporal_layers; tl++) { + int layer = sl * number_temporal_layers + tl; + if (number_temporal_layers == 3) { + rc_cfg_.layer_target_bitrate[layer] = + kTemporalRateAllocation3Layer[tl] * spatial_bitrate / 100; + svc_params_.layer_target_bitrate[layer] = + kTemporalRateAllocation3Layer[tl] * spatial_bitrate / 100; + } else if (number_temporal_layers == 2) { + rc_cfg_.layer_target_bitrate[layer] = + kTemporalRateAllocation2Layer[tl] * spatial_bitrate / 100; + svc_params_.layer_target_bitrate[layer] = + kTemporalRateAllocation2Layer[tl] * spatial_bitrate / 100; + } else if (number_temporal_layers == 1) { + rc_cfg_.layer_target_bitrate[layer] = spatial_bitrate; + svc_params_.layer_target_bitrate[layer] = spatial_bitrate; + } + } + rc_cfg_.target_bandwidth += spatial_bitrate; + cfg_.rc_target_bitrate += spatial_bitrate; + } + + // Layer min/max quantizer. + for (int sl = 0; sl < number_spatial_layers; ++sl) { + for (int tl = 0; tl < number_temporal_layers; ++tl) { + const int i = sl * number_temporal_layers + tl; + rc_cfg_.max_quantizers[i] = rc_cfg_.max_quantizer; + rc_cfg_.min_quantizers[i] = rc_cfg_.min_quantizer; + svc_params_.max_quantizers[i] = cfg_.rc_max_quantizer; + svc_params_.min_quantizers[i] = cfg_.rc_min_quantizer; + } + } + } + + std::unique_ptr<aom::AV1RateControlRTC> rc_api_; + aom::AV1RateControlRtcConfig rc_cfg_; + int aq_mode_; + int key_interval_; + aom::AV1FrameParamsRTC frame_params_; + bool encoder_exit_; + aom_svc_params_t svc_params_; + aom_svc_layer_id_t layer_id_; + int layer_frame_cnt_; + int superframe_cnt_; + int frame_cnt_; + bool dynamic_temporal_layers_; + bool dynamic_spatial_layers_; + int num_drops_; + int max_consec_drop_; + int frame_drop_thresh_; +}; + +TEST_P(RcInterfaceTest, OneLayer) { RunOneLayer(); } + +TEST_P(RcInterfaceTest, OneLayerDropFramesCBR) { RunOneLayerDropFramesCBR(); } + +TEST_P(RcInterfaceTest, OneLayerPeriodicKey) { RunOneLayerPeriodicKey(); } + +TEST_P(RcInterfaceTest, OneLayerScreen) { RunOneLayerScreen(); } + +TEST_P(RcInterfaceTest, Svc) { RunSvc(); } + +TEST_P(RcInterfaceTest, SvcPeriodicKey) { RunSvcPeriodicKey(); } + +TEST_P(RcInterfaceTest, SvcDynamicTemporal) { RunSvcDynamicTemporal(); } + +TEST_P(RcInterfaceTest, SvcDynamicSpatial) { RunSvcDynamicSpatial(); } + +AV1_INSTANTIATE_TEST_SUITE(RcInterfaceTest, ::testing::Values(0, 3)); + +} // namespace diff --git a/third_party/aom/test/ratectrl_test.cc b/third_party/aom/test/ratectrl_test.cc new file mode 100644 index 0000000000..d951b1197f --- /dev/null +++ b/third_party/aom/test/ratectrl_test.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "av1/encoder/firstpass.h" +#include "av1/encoder/ratectrl.h" +#include "av1/encoder/tpl_model.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +TEST(RatectrlTest, QModeGetQIndexTest) { + int base_q_index = 36; + int gf_update_type = INTNL_ARF_UPDATE; + int gf_pyramid_level = 1; + int arf_q = 100; + int q_index = av1_q_mode_get_q_index(base_q_index, gf_update_type, + gf_pyramid_level, arf_q); + EXPECT_EQ(q_index, arf_q); + + gf_update_type = INTNL_ARF_UPDATE; + gf_pyramid_level = 3; + q_index = av1_q_mode_get_q_index(base_q_index, gf_update_type, + gf_pyramid_level, arf_q); + EXPECT_LT(q_index, arf_q); + + gf_update_type = LF_UPDATE; + q_index = av1_q_mode_get_q_index(base_q_index, gf_update_type, + gf_pyramid_level, arf_q); + EXPECT_EQ(q_index, base_q_index); +} +} // namespace diff --git a/third_party/aom/test/rd_test.cc b/third_party/aom/test/rd_test.cc new file mode 100644 index 0000000000..0c481fcbb6 --- /dev/null +++ b/third_party/aom/test/rd_test.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <math.h> +#include <vector> + +#include "av1/common/quant_common.h" +#include "av1/encoder/rd.h" +#include "aom/aom_codec.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +TEST(RdTest, GetDeltaqOffsetValueTest1) { + aom_bit_depth_t bit_depth = AOM_BITS_8; + double beta = 4; + int q_index = 29; + int dc_q_step = + av1_dc_quant_QTX(q_index, 0, static_cast<aom_bit_depth_t>(bit_depth)); + EXPECT_EQ(dc_q_step, 32); + + int ref_new_dc_q_step = static_cast<int>(round(dc_q_step / sqrt(beta))); + EXPECT_EQ(ref_new_dc_q_step, 16); + + int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta); + int new_dc_q_step = av1_dc_quant_QTX(q_index, delta_q, + static_cast<aom_bit_depth_t>(bit_depth)); + + EXPECT_EQ(new_dc_q_step, ref_new_dc_q_step); +} + +TEST(RdTest, GetDeltaqOffsetValueTest2) { + aom_bit_depth_t bit_depth = AOM_BITS_8; + double beta = 1.0 / 4.0; + int q_index = 29; + int dc_q_step = + av1_dc_quant_QTX(q_index, 0, static_cast<aom_bit_depth_t>(bit_depth)); + EXPECT_EQ(dc_q_step, 32); + + int ref_new_dc_q_step = static_cast<int>(round(dc_q_step / sqrt(beta))); + EXPECT_EQ(ref_new_dc_q_step, 64); + + int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta); + int new_dc_q_step = av1_dc_quant_QTX(q_index, delta_q, + static_cast<aom_bit_depth_t>(bit_depth)); + + EXPECT_EQ(new_dc_q_step, ref_new_dc_q_step); +} + +TEST(RdTest, GetDeltaqOffsetBoundaryTest1) { + aom_bit_depth_t bit_depth = AOM_BITS_8; + double beta = 0.000000001; + std::vector<int> q_index_ls = { 254, 255 }; + for (auto q_index : q_index_ls) { + int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta); + EXPECT_EQ(q_index + delta_q, 255); + } +} + +TEST(RdTest, GetDeltaqOffsetBoundaryTest2) { + aom_bit_depth_t bit_depth = AOM_BITS_8; + double beta = 100; + std::vector<int> q_index_ls = { 1, 0 }; + for (auto q_index : q_index_ls) { + int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta); + EXPECT_EQ(q_index + delta_q, 0); + } +} + +TEST(RdTest, GetDeltaqOffsetUnitaryTest1) { + aom_bit_depth_t bit_depth = AOM_BITS_8; + double beta = 1; + for (int q_index = 0; q_index < 255; ++q_index) { + int delta_q = av1_get_deltaq_offset(bit_depth, q_index, beta); + EXPECT_EQ(delta_q, 0); + } +} + +} // namespace diff --git a/third_party/aom/test/reconinter_test.cc b/third_party/aom/test/reconinter_test.cc new file mode 100644 index 0000000000..ee1a9893db --- /dev/null +++ b/third_party/aom/test/reconinter_test.cc @@ -0,0 +1,372 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <tuple> + +#include "config/aom_config.h" +#include "config/av1_rtcd.h" + +#include "aom_ports/mem.h" +#include "av1/common/scan.h" +#include "av1/common/txb_common.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { +using libaom_test::ACMRandom; + +using BuildCompDiffWtdMaskFunc = void (*)(uint8_t *mask, + DIFFWTD_MASK_TYPE mask_type, + const uint8_t *src0, int src0_stride, + const uint8_t *src1, int src1_stride, + int h, int w); + +using BuildCompDiffwtdMaskDParam = + std::tuple<BLOCK_SIZE, BuildCompDiffWtdMaskFunc>; + +#if HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON +::testing::internal::ParamGenerator<BuildCompDiffwtdMaskDParam> BuildParams( + BuildCompDiffWtdMaskFunc filter) { + return ::testing::Combine(::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL), + ::testing::Values(filter)); +} +#endif + +class BuildCompDiffwtdMaskTest + : public ::testing::TestWithParam<BuildCompDiffwtdMaskDParam> { + public: + BuildCompDiffwtdMaskTest() : rnd_(ACMRandom::DeterministicSeed()) {} + ~BuildCompDiffwtdMaskTest() override = default; + + protected: + void RunTest(BuildCompDiffWtdMaskFunc test_impl, bool is_speed, + const DIFFWTD_MASK_TYPE type) { + const int sb_type = GET_PARAM(0); + const int width = block_size_wide[sb_type]; + const int height = block_size_high[sb_type]; + DECLARE_ALIGNED(16, uint8_t, mask_ref[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, mask_test[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, src0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, src1[MAX_SB_SQUARE]); + for (int i = 0; i < width * height; i++) { + src0[i] = rnd_.Rand8(); + src1[i] = rnd_.Rand8(); + } + const int run_times = is_speed ? (10000000 / (width + height)) : 1; + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + av1_build_compound_diffwtd_mask_c(mask_ref, type, src0, width, src1, + width, height, width); + } + const double t1 = get_time_mark(&timer); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + test_impl(mask_test, type, src0, width, src1, width, height, width); + } + const double t2 = get_time_mark(&timer); + if (is_speed) { + printf("mask %d %3dx%-3d:%7.2f/%7.2fns", type, width, height, t1, t2); + printf("(%3.2f)\n", t1 / t2); + } + for (int r = 0; r < height; ++r) { + for (int c = 0; c < width; ++c) { + ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width]) + << "[" << r << "," << c << "] " << run_times << " @ " << width + << "x" << height << " inv " << type; + } + } + } + + private: + ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BuildCompDiffwtdMaskTest); + +TEST_P(BuildCompDiffwtdMaskTest, match) { + RunTest(GET_PARAM(1), 0, DIFFWTD_38); + RunTest(GET_PARAM(1), 0, DIFFWTD_38_INV); +} +TEST_P(BuildCompDiffwtdMaskTest, DISABLED_Speed) { + RunTest(GET_PARAM(1), 1, DIFFWTD_38); + RunTest(GET_PARAM(1), 1, DIFFWTD_38_INV); +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, BuildCompDiffwtdMaskTest, + BuildParams(av1_build_compound_diffwtd_mask_sse4_1)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, BuildCompDiffwtdMaskTest, + BuildParams(av1_build_compound_diffwtd_mask_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, BuildCompDiffwtdMaskTest, + BuildParams(av1_build_compound_diffwtd_mask_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH + +using BuildCompDiffWtdMaskHighbdFunc = + void (*)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, + int src0_stride, const uint8_t *src1, int src1_stride, int h, + int w, int bd); + +using BuildCompDiffwtdMaskHighbdParam = + std::tuple<BLOCK_SIZE, int, BuildCompDiffWtdMaskHighbdFunc>; + +#if HAVE_SSSE3 || HAVE_AVX2 || HAVE_NEON +::testing::internal::ParamGenerator<BuildCompDiffwtdMaskHighbdParam> +BuildParamsHighbd(BuildCompDiffWtdMaskHighbdFunc filter) { + return ::testing::Combine(::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL), + ::testing::Values(8, 10, 12), + ::testing::Values(filter)); +} +#endif + +class BuildCompDiffwtdMaskHighbdTest + : public ::testing::TestWithParam<BuildCompDiffwtdMaskHighbdParam> { + public: + BuildCompDiffwtdMaskHighbdTest() : rnd_(ACMRandom::DeterministicSeed()) {} + ~BuildCompDiffwtdMaskHighbdTest() override = default; + + protected: + void RunTest(BuildCompDiffWtdMaskHighbdFunc test_impl, bool is_speed, + const DIFFWTD_MASK_TYPE type) { + const int sb_type = GET_PARAM(0); + const int bd = GET_PARAM(1); + const int width = block_size_wide[sb_type]; + const int height = block_size_high[sb_type]; + const int mask = (1 << bd) - 1; + DECLARE_ALIGNED(16, uint8_t, mask_ref[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, mask_test[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint16_t, src0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint16_t, src1[MAX_SB_SQUARE]); + for (int i = 0; i < width * height; i++) { + src0[i] = rnd_.Rand16() & mask; + src1[i] = rnd_.Rand16() & mask; + } + const int run_times = is_speed ? (10000000 / (width + height)) : 1; + aom_usec_timer timer; + + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + uint8_t *src0_8 = CONVERT_TO_BYTEPTR(src0); + uint8_t *src1_8 = CONVERT_TO_BYTEPTR(src1); + av1_build_compound_diffwtd_mask_highbd_c( + mask_ref, type, src0_8, width, src1_8, width, height, width, bd); + } + const double t1 = get_time_mark(&timer); + + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + uint8_t *src0_8 = CONVERT_TO_BYTEPTR(src0); + uint8_t *src1_8 = CONVERT_TO_BYTEPTR(src1); + test_impl(mask_test, type, src0_8, width, src1_8, width, height, width, + bd); + } + const double t2 = get_time_mark(&timer); + + if (is_speed) { + printf("mask %d %3dx%-3d:%7.2f/%7.2fns", type, width, height, t1, t2); + printf("(%3.2f)\n", t1 / t2); + } + for (int r = 0; r < height; ++r) { + for (int c = 0; c < width; ++c) { + ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width]) + << "[" << r << "," << c << "] " << run_times << " @ " << width + << "x" << height << " inv " << type; + } + } + } + + private: + ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BuildCompDiffwtdMaskHighbdTest); + +TEST_P(BuildCompDiffwtdMaskHighbdTest, match) { + RunTest(GET_PARAM(2), 0, DIFFWTD_38); + RunTest(GET_PARAM(2), 0, DIFFWTD_38_INV); +} +TEST_P(BuildCompDiffwtdMaskHighbdTest, DISABLED_Speed) { + RunTest(GET_PARAM(2), 1, DIFFWTD_38); + RunTest(GET_PARAM(2), 1, DIFFWTD_38_INV); +} + +#if HAVE_SSSE3 +INSTANTIATE_TEST_SUITE_P( + SSSE3, BuildCompDiffwtdMaskHighbdTest, + BuildParamsHighbd(av1_build_compound_diffwtd_mask_highbd_ssse3)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, BuildCompDiffwtdMaskHighbdTest, + BuildParamsHighbd(av1_build_compound_diffwtd_mask_highbd_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, BuildCompDiffwtdMaskHighbdTest, + BuildParamsHighbd(av1_build_compound_diffwtd_mask_highbd_neon)); +#endif +#endif // CONFIG_AV1_HIGHBITDEPTH + +using BuildCompDiffWtdMaskD16Func = void (*)( + uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, + int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, + ConvolveParams *conv_params, int bd); + +using BuildCompDiffwtdMaskD16Param = + std::tuple<int, BuildCompDiffWtdMaskD16Func, BLOCK_SIZE>; + +#if HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON +::testing::internal::ParamGenerator<BuildCompDiffwtdMaskD16Param> BuildParams( + BuildCompDiffWtdMaskD16Func filter) { + return ::testing::Combine(::testing::Range(8, 13, 2), + ::testing::Values(filter), + ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL)); +} +#endif + +class BuildCompDiffwtdMaskD16Test + : public ::testing::TestWithParam<BuildCompDiffwtdMaskD16Param> { + public: + BuildCompDiffwtdMaskD16Test() : rnd_(ACMRandom::DeterministicSeed()) {} + ~BuildCompDiffwtdMaskD16Test() override = default; + + protected: + void RunCheckOutput(BuildCompDiffWtdMaskD16Func test_impl) { + const int block_idx = GET_PARAM(2); + const int bd = GET_PARAM(0); + const int width = block_size_wide[block_idx]; + const int height = block_size_high[block_idx]; + DECLARE_ALIGNED(16, uint8_t, mask_ref[2 * MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, mask_test[2 * MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint16_t, src0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint16_t, src1[MAX_SB_SQUARE]); + + ConvolveParams conv_params = + get_conv_params_no_round(0, 0, nullptr, 0, 1, bd); + + const int in_precision = + bd + 2 * FILTER_BITS - conv_params.round_0 - conv_params.round_1 + 2; + + for (int i = 0; i < MAX_SB_SQUARE; i++) { + src0[i] = rnd_.Rand16() & ((1 << in_precision) - 1); + src1[i] = rnd_.Rand16() & ((1 << in_precision) - 1); + } + + for (int mask_type = 0; mask_type < DIFFWTD_MASK_TYPES; mask_type++) { + av1_build_compound_diffwtd_mask_d16_c( + mask_ref, (DIFFWTD_MASK_TYPE)mask_type, src0, width, src1, width, + height, width, &conv_params, bd); + + test_impl(mask_test, (DIFFWTD_MASK_TYPE)mask_type, src0, width, src1, + width, height, width, &conv_params, bd); + + for (int r = 0; r < height; ++r) { + for (int c = 0; c < width; ++c) { + ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width]) + << "Mismatch at unit tests for BuildCompDiffwtdMaskD16Test\n" + << " Pixel mismatch at index " + << "[" << r << "," << c << "] " + << " @ " << width << "x" << height << " inv " << mask_type; + } + } + } + } + + void RunSpeedTest(BuildCompDiffWtdMaskD16Func test_impl, + DIFFWTD_MASK_TYPE mask_type) { + const int block_idx = GET_PARAM(2); + const int bd = GET_PARAM(0); + const int width = block_size_wide[block_idx]; + const int height = block_size_high[block_idx]; + DECLARE_ALIGNED(16, uint8_t, mask[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint16_t, src0[MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, uint16_t, src1[MAX_SB_SQUARE]); + + ConvolveParams conv_params = + get_conv_params_no_round(0, 0, nullptr, 0, 1, bd); + + const int in_precision = + bd + 2 * FILTER_BITS - conv_params.round_0 - conv_params.round_1 + 2; + + for (int i = 0; i < MAX_SB_SQUARE; i++) { + src0[i] = rnd_.Rand16() & ((1 << in_precision) - 1); + src1[i] = rnd_.Rand16() & ((1 << in_precision) - 1); + } + + const int num_loops = 10000000 / (width + height); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + + for (int i = 0; i < num_loops; ++i) + av1_build_compound_diffwtd_mask_d16_c(mask, mask_type, src0, width, src1, + width, height, width, &conv_params, + bd); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + + aom_usec_timer timer1; + aom_usec_timer_start(&timer1); + + for (int i = 0; i < num_loops; ++i) + test_impl(mask, mask_type, src0, width, src1, width, height, width, + &conv_params, bd); + + aom_usec_timer_mark(&timer1); + const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); + printf("av1_build_compound_diffwtd_mask_d16 %3dx%-3d: %7.2f \n", width, + height, elapsed_time / double(elapsed_time1)); + } + + private: + ACMRandom rnd_; +}; // class BuildCompDiffwtdMaskD16Test +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BuildCompDiffwtdMaskD16Test); + +TEST_P(BuildCompDiffwtdMaskD16Test, CheckOutput) { + RunCheckOutput(GET_PARAM(1)); +} + +TEST_P(BuildCompDiffwtdMaskD16Test, DISABLED_Speed) { + RunSpeedTest(GET_PARAM(1), DIFFWTD_38); + RunSpeedTest(GET_PARAM(1), DIFFWTD_38_INV); +} + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, BuildCompDiffwtdMaskD16Test, + BuildParams(av1_build_compound_diffwtd_mask_d16_sse4_1)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, BuildCompDiffwtdMaskD16Test, + BuildParams(av1_build_compound_diffwtd_mask_d16_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, BuildCompDiffwtdMaskD16Test, + BuildParams(av1_build_compound_diffwtd_mask_d16_neon)); +#endif + +} // namespace diff --git a/third_party/aom/test/register_state_check.h b/third_party/aom/test/register_state_check.h new file mode 100644 index 0000000000..4aad81469e --- /dev/null +++ b/third_party/aom/test/register_state_check.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_REGISTER_STATE_CHECK_H_ +#define AOM_TEST_REGISTER_STATE_CHECK_H_ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#include "aom/aom_integer.h" + +// API_REGISTER_STATE_CHECK(function) +// Validates the environment pre & post function execution to ensure the +// environment is in a consistent state. This should be used with API +// function sand assembly functions which are not expected to fully restore +// the system state. +// See platform implementations of RegisterStateCheck and +// RegisterStateCheckMMX for details. + +#if defined(_WIN64) && AOM_ARCH_X86_64 + +#undef NOMINMAX +#define NOMINMAX +#undef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <winnt.h> + +inline bool operator==(const M128A &lhs, const M128A &rhs) { + return (lhs.Low == rhs.Low && lhs.High == rhs.High); +} + +namespace libaom_test { + +// Compares the state of xmm[6-15] at construction with their state at +// destruction. These registers should be preserved by the callee on +// Windows x64. +class RegisterStateCheck { + public: + RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); } + ~RegisterStateCheck() { Check(); } + + private: + static bool StoreRegisters(CONTEXT *const context) { + const HANDLE this_thread = GetCurrentThread(); + EXPECT_NE(this_thread, nullptr); + context->ContextFlags = CONTEXT_FLOATING_POINT; + const bool context_saved = GetThreadContext(this_thread, context) == TRUE; + EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError(); + return context_saved; + } + + // Compares the register state. Returns true if the states match. + void Check() const { + ASSERT_TRUE(initialized_); + CONTEXT post_context; + ASSERT_TRUE(StoreRegisters(&post_context)); + + const M128A *xmm_pre = &pre_context_.Xmm6; + const M128A *xmm_post = &post_context.Xmm6; + for (int i = 6; i <= 15; ++i) { + EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!"; + ++xmm_pre; + ++xmm_post; + } + } + + bool initialized_; + CONTEXT pre_context_; +}; +} // namespace libaom_test + +#else + +namespace libaom_test { + +class RegisterStateCheck {}; +} // namespace libaom_test + +#endif // _WIN64 && AOM_ARCH_X86_64 + +#if (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__) +namespace libaom_test { + +// Checks the FPU tag word pre/post execution to ensure emms has been called. +class RegisterStateCheckMMX { + public: + RegisterStateCheckMMX() { + __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_)); + } + ~RegisterStateCheckMMX() { Check(); } + + private: + // Checks the FPU tag word pre/post execution, returning false if not cleared + // to 0xffff. + void Check() const { + EXPECT_EQ(0xffff, pre_fpu_env_[4]) + << "FPU was in an inconsistent state prior to call"; + + uint16_t post_fpu_env[14]; + __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env)); + EXPECT_EQ(0xffff, post_fpu_env[4]) + << "FPU was left in an inconsistent state after call"; + } + + uint16_t pre_fpu_env_[14]; +}; +} // namespace libaom_test + +#else +namespace libaom_test { + +class RegisterStateCheckMMX {}; +} // namespace libaom_test + +#endif // (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__) + +#define API_REGISTER_STATE_CHECK(statement) \ + do { \ + libaom_test::RegisterStateCheck reg_check; \ + libaom_test::RegisterStateCheckMMX reg_check_mmx; \ + statement; \ + (void)reg_check_mmx; \ + (void)reg_check; \ + } while (false) + +#endif // AOM_TEST_REGISTER_STATE_CHECK_H_ diff --git a/third_party/aom/test/resize_test.cc b/third_party/aom/test/resize_test.cc new file mode 100644 index 0000000000..7bad45300a --- /dev/null +++ b/third_party/aom/test/resize_test.cc @@ -0,0 +1,1136 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <climits> +#include <vector> +#include "aom_dsp/aom_dsp_common.h" +#include "common/tools_common.h" +#include "av1/encoder/encoder.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +// Enable(1) or Disable(0) writing of the compressed bitstream. +#define WRITE_COMPRESSED_STREAM 0 + +namespace { + +#if WRITE_COMPRESSED_STREAM +static void mem_put_le16(char *const mem, unsigned int val) { + mem[0] = val; + mem[1] = val >> 8; +} + +static void mem_put_le32(char *const mem, unsigned int val) { + mem[0] = val; + mem[1] = val >> 8; + mem[2] = val >> 16; + mem[3] = val >> 24; +} + +static void write_ivf_file_header(const aom_codec_enc_cfg_t *const cfg, + int frame_cnt, FILE *const outfile) { + char header[32]; + + header[0] = 'D'; + header[1] = 'K'; + header[2] = 'I'; + header[3] = 'F'; + mem_put_le16(header + 4, 0); /* version */ + mem_put_le16(header + 6, 32); /* headersize */ + mem_put_le32(header + 8, AV1_FOURCC); /* fourcc (av1) */ + mem_put_le16(header + 12, cfg->g_w); /* width */ + mem_put_le16(header + 14, cfg->g_h); /* height */ + mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ + mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ + mem_put_le32(header + 24, frame_cnt); /* length */ + mem_put_le32(header + 28, 0); /* unused */ + + (void)fwrite(header, 1, 32, outfile); +} + +static void write_ivf_frame_size(FILE *const outfile, const size_t size) { + char header[4]; + mem_put_le32(header, static_cast<unsigned int>(size)); + (void)fwrite(header, 1, 4, outfile); +} + +static void write_ivf_frame_header(const aom_codec_cx_pkt_t *const pkt, + FILE *const outfile) { + char header[12]; + aom_codec_pts_t pts; + + if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return; + + pts = pkt->data.frame.pts; + mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz)); + mem_put_le32(header + 4, pts & 0xFFFFFFFF); + mem_put_le32(header + 8, pts >> 32); + + (void)fwrite(header, 1, 12, outfile); +} +#endif // WRITE_COMPRESSED_STREAM + +const unsigned int kInitialWidth = 320; +const unsigned int kInitialHeight = 240; + +struct FrameInfo { + FrameInfo(aom_codec_pts_t _pts, unsigned int _w, unsigned int _h) + : pts(_pts), w(_w), h(_h) {} + + aom_codec_pts_t pts; + unsigned int w; + unsigned int h; +}; + +void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w, + unsigned int initial_h, int flag_codec, + bool change_start_resln, unsigned int *w, + unsigned int *h) { + if (frame < 10) { + if (change_start_resln) { + *w = initial_w / 4; + *h = initial_h / 4; + } else { + *w = initial_w; + *h = initial_h; + } + return; + } + if (frame < 20) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 30) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 40) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 50) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 60) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 70) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 80) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 90) { + *w = initial_w / 2; + *h = initial_h / 2; + return; + } + if (frame < 100) { + *w = initial_w * 3 / 4; + *h = initial_h * 3 / 4; + return; + } + if (frame < 110) { + *w = initial_w; + *h = initial_h; + return; + } + // Go down very low + if (frame < 120) { + *w = initial_w / 4; + *h = initial_h / 4; + return; + } + if (flag_codec == 1) { + // Cases that only works for AV1. + // For AV1: Swap width and height of original. + if (frame < 140) { + *w = initial_h; + *h = initial_w; + return; + } + } + *w = initial_w; + *h = initial_h; +} + +class ResizingVideoSource : public ::libaom_test::DummyVideoSource { + public: + ResizingVideoSource() { + SetSize(kInitialWidth, kInitialHeight); + limit_ = 150; + } + int flag_codec_; + bool change_start_resln_; + ~ResizingVideoSource() override = default; + + protected: + void Begin() override { + frame_ = 0; + unsigned int width; + unsigned int height; + ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, flag_codec_, + change_start_resln_, &width, &height); + SetSize(width, height); + FillFrame(); + } + void Next() override { + ++frame_; + unsigned int width; + unsigned int height; + ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, flag_codec_, + change_start_resln_, &width, &height); + SetSize(width, height); + FillFrame(); + } +}; + +class ResizeTest + : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>, + public ::libaom_test::EncoderTest { + protected: + ResizeTest() : EncoderTest(GET_PARAM(0)) {} + + ~ResizeTest() override = default; + + void SetUp() override { InitializeConfig(GET_PARAM(1)); } + + void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + if (GET_PARAM(1) == ::libaom_test::kRealTime) { + encoder->Control(AV1E_SET_AQ_MODE, 3); + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + } + } + } + + void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) override { + frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); + } + + std::vector<FrameInfo> frame_info_list_; +}; + +TEST_P(ResizeTest, TestExternalResizeWorks) { + ResizingVideoSource video; + video.flag_codec_ = 0; + video.change_start_resln_ = false; + cfg_.g_lag_in_frames = 0; + // We use max(kInitialWidth, kInitialHeight) because during the test + // the width and height of the frame are swapped + cfg_.g_forced_max_frame_width = cfg_.g_forced_max_frame_height = + AOMMAX(kInitialWidth, kInitialHeight); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Check we decoded the same number of frames as we attempted to encode + ASSERT_EQ(frame_info_list_.size(), video.limit()); + + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const unsigned int frame = static_cast<unsigned>(info->pts); + unsigned int expected_w; + unsigned int expected_h; + ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, video.flag_codec_, + video.change_start_resln_, &expected_w, &expected_h); + EXPECT_EQ(expected_w, info->w) + << "Frame " << frame << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) + << "Frame " << frame << " had unexpected height"; + } +} + +#if !CONFIG_REALTIME_ONLY +const unsigned int kStepDownFrame = 3; +const unsigned int kStepUpFrame = 6; + +class ResizeInternalTestLarge : public ResizeTest { + protected: +#if WRITE_COMPRESSED_STREAM + ResizeInternalTestLarge() + : ResizeTest(), frame0_psnr_(0.0), outfile_(nullptr), out_frames_(0) {} +#else + ResizeInternalTestLarge() : ResizeTest(), frame0_psnr_(0.0) {} +#endif + + ~ResizeInternalTestLarge() override = default; + + void BeginPassHook(unsigned int /*pass*/) override { +#if WRITE_COMPRESSED_STREAM + outfile_ = fopen("av10-2-05-resize.ivf", "wb"); +#endif + } + + void EndPassHook() override { +#if WRITE_COMPRESSED_STREAM + if (outfile_) { + if (!fseek(outfile_, 0, SEEK_SET)) + write_ivf_file_header(&cfg_, out_frames_, outfile_); + fclose(outfile_); + outfile_ = nullptr; + } +#endif + } + + void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) override { + if (change_config_) { + int new_q = 60; + if (video->frame() == 0) { + struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + } else if (video->frame() == 1) { + struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q; + encoder->Config(&cfg_); + } + } else { + if (video->frame() >= kStepDownFrame && video->frame() < kStepUpFrame) { + struct aom_scaling_mode mode = { AOME_FOURFIVE, AOME_THREEFIVE }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + } + if (video->frame() >= kStepUpFrame) { + struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + } + } + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0]; + EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 4.1); + } + +#if WRITE_COMPRESSED_STREAM + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + ++out_frames_; + + // Write initial file header if first frame. + if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_); + + // Write frame header and data. + write_ivf_frame_header(pkt, outfile_); + (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); + } +#endif + + double frame0_psnr_; + bool change_config_; +#if WRITE_COMPRESSED_STREAM + FILE *outfile_; + unsigned int out_frames_; +#endif +}; + +TEST_P(ResizeInternalTestLarge, TestInternalResizeWorks) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 10); + init_flags_ = AOM_CODEC_USE_PSNR; + change_config_ = false; + + // q picked such that initial keyframe on this clip is ~30dB PSNR + cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48; + + // If the number of frames being encoded is smaller than g_lag_in_frames + // the encoded frame is unavailable using the current API. Comparing + // frames to detect mismatch would then not be possible. Set + // g_lag_in_frames = 0 to get around this. + cfg_.g_lag_in_frames = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + } + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const aom_codec_pts_t pts = info->pts; + if (pts >= kStepDownFrame && pts < kStepUpFrame) { + ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width"; + ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height"; + } else { + EXPECT_EQ(352U, info->w) << "Frame " << pts << " had unexpected width"; + EXPECT_EQ(288U, info->h) << "Frame " << pts << " had unexpected height"; + } + } +} + +TEST_P(ResizeInternalTestLarge, TestInternalResizeChangeConfig) { + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 10); + cfg_.g_w = 352; + cfg_.g_h = 288; + change_config_ = true; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_SUITE(ResizeInternalTestLarge, + ::testing::Values(::libaom_test::kOnePassGood)); +#endif + +// Parameters: test mode, speed, threads +class ResizeRealtimeTest + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int, + int>, + public ::libaom_test::EncoderTest { + protected: + ResizeRealtimeTest() + : EncoderTest(GET_PARAM(0)), num_threads_(GET_PARAM(3)), + set_scale_mode_(false), set_scale_mode2_(false), + set_scale_mode3_(false) {} + ~ResizeRealtimeTest() override = default; + + void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_AQ_MODE, 3); + encoder->Control(AV1E_SET_ALLOW_WARPED_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_OBMC, 0); + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + } + if (set_scale_mode_) { + struct aom_scaling_mode mode; + if (video->frame() <= 20) + mode = { AOME_ONETWO, AOME_ONETWO }; + else if (video->frame() <= 40) + mode = { AOME_ONEFOUR, AOME_ONEFOUR }; + else if (video->frame() > 40) + mode = { AOME_NORMAL, AOME_NORMAL }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + } else if (set_scale_mode2_) { + struct aom_scaling_mode mode; + if (video->frame() <= 20) + mode = { AOME_ONEFOUR, AOME_ONEFOUR }; + else if (video->frame() <= 40) + mode = { AOME_ONETWO, AOME_ONETWO }; + else if (video->frame() > 40) + mode = { AOME_THREEFOUR, AOME_THREEFOUR }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + } else if (set_scale_mode3_) { + struct aom_scaling_mode mode; + if (video->frame() <= 30) + mode = { AOME_ONETWO, AOME_NORMAL }; + else + mode = { AOME_NORMAL, AOME_NORMAL }; + encoder->Control(AOME_SET_SCALEMODE, &mode); + } + + if (change_bitrate_ && video->frame() == frame_change_bitrate_) { + change_bitrate_ = false; + cfg_.rc_target_bitrate = 500; + encoder->Config(&cfg_); + } + } + + void SetUp() override { + InitializeConfig(GET_PARAM(1)); + set_cpu_used_ = GET_PARAM(2); + } + + void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) override { + frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); + } + + void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) override { + double mismatch_psnr = compute_psnr(img1, img2); + mismatch_psnr_ += mismatch_psnr; + ++mismatch_nframes_; + } + + unsigned int GetMismatchFrames() { return mismatch_nframes_; } + + void DefaultConfig() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_end_usage = AOM_CBR; + cfg_.kf_mode = AOM_KF_AUTO; + cfg_.g_lag_in_frames = 0; + cfg_.kf_min_dist = cfg_.kf_max_dist = 3000; + // Enable dropped frames. + cfg_.rc_dropframe_thresh = 1; + // Disable error_resilience mode. + cfg_.g_error_resilient = 0; + cfg_.g_threads = num_threads_; + // Run at low bitrate. + cfg_.rc_target_bitrate = 200; + // We use max(kInitialWidth, kInitialHeight) because during the test + // the width and height of the frame are swapped + cfg_.g_forced_max_frame_width = cfg_.g_forced_max_frame_height = + AOMMAX(kInitialWidth, kInitialHeight); + if (set_scale_mode_ || set_scale_mode2_ || set_scale_mode3_) { + cfg_.rc_dropframe_thresh = 0; + cfg_.g_forced_max_frame_width = 1280; + cfg_.g_forced_max_frame_height = 1280; + } + } + + std::vector<FrameInfo> frame_info_list_; + int set_cpu_used_; + int num_threads_; + bool change_bitrate_; + unsigned int frame_change_bitrate_; + double mismatch_psnr_; + int mismatch_nframes_; + bool set_scale_mode_; + bool set_scale_mode2_; + bool set_scale_mode3_; +}; + +// Check the AOME_SET_SCALEMODE control by downsizing to +// 1/2, then 1/4, and then back up to originsal. +TEST_P(ResizeRealtimeTest, TestInternalResizeSetScaleMode1) { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + cfg_.g_w = 1280; + cfg_.g_h = 720; + set_scale_mode_ = true; + set_scale_mode2_ = false; + set_scale_mode3_ = false; + DefaultConfig(); + change_bitrate_ = false; + mismatch_nframes_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Check we decoded the same number of frames as we attempted to encode + ASSERT_EQ(frame_info_list_.size(), video.limit()); + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const auto frame = static_cast<unsigned>(info->pts); + unsigned int expected_w = 1280 >> 1; + unsigned int expected_h = 720 >> 1; + if (frame > 40) { + expected_w = 1280; + expected_h = 720; + } else if (frame > 20 && frame <= 40) { + expected_w = 1280 >> 2; + expected_h = 720 >> 2; + } + EXPECT_EQ(expected_w, info->w) + << "Frame " << frame << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) + << "Frame " << frame << " had unexpected height"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } +} + +// Check the AOME_SET_SCALEMODE control by downsizing to +// 1/2, then 1/4, and then back up to originsal. +TEST_P(ResizeRealtimeTest, TestInternalResizeSetScaleMode1QVGA) { + ::libaom_test::I420VideoSource video("desktop1.320_180.yuv", 320, 180, 30, 1, + 0, 80); + cfg_.g_w = 320; + cfg_.g_h = 180; + set_scale_mode_ = true; + set_scale_mode2_ = false; + set_scale_mode3_ = false; + DefaultConfig(); + change_bitrate_ = false; + mismatch_nframes_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Check we decoded the same number of frames as we attempted to encode + ASSERT_EQ(frame_info_list_.size(), video.limit()); + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const auto frame = static_cast<unsigned>(info->pts); + unsigned int expected_w = 320 >> 1; + unsigned int expected_h = 180 >> 1; + if (frame > 40) { + expected_w = 320; + expected_h = 180; + } else if (frame > 20 && frame <= 40) { + expected_w = 320 >> 2; + expected_h = 180 >> 2; + } + EXPECT_EQ(expected_w, info->w) + << "Frame " << frame << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) + << "Frame " << frame << " had unexpected height"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } +} + +// Check the AOME_SET_SCALEMODE control by downsizing to +// 1/4, then 1/2, and then up to 3/4. +TEST_P(ResizeRealtimeTest, TestInternalResizeSetScaleMode2) { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + cfg_.g_w = 1280; + cfg_.g_h = 720; + set_scale_mode_ = false; + set_scale_mode2_ = true; + set_scale_mode3_ = false; + DefaultConfig(); + change_bitrate_ = false; + mismatch_nframes_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Check we decoded the same number of frames as we attempted to encode + ASSERT_EQ(frame_info_list_.size(), video.limit()); + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const auto frame = static_cast<unsigned>(info->pts); + unsigned int expected_w = 1280 >> 2; + unsigned int expected_h = 720 >> 2; + if (frame > 40) { + expected_w = (3 * 1280) >> 2; + expected_h = (3 * 720) >> 2; + } else if (frame > 20 && frame <= 40) { + expected_w = 1280 >> 1; + expected_h = 720 >> 1; + } + EXPECT_EQ(expected_w, info->w) + << "Frame " << frame << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) + << "Frame " << frame << " had unexpected height"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } +} + +// Check the AOME_SET_SCALEMODE control by downsizing to +// 1/2 horizontally only and then back up to original. +TEST_P(ResizeRealtimeTest, TestInternalResizeSetScaleMode3) { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + cfg_.g_w = 1280; + cfg_.g_h = 720; + set_scale_mode_ = false; + set_scale_mode2_ = false; + set_scale_mode3_ = true; + DefaultConfig(); + change_bitrate_ = false; + mismatch_nframes_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Check we decoded the same number of frames as we attempted to encode + ASSERT_EQ(frame_info_list_.size(), video.limit()); + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const auto frame = static_cast<unsigned>(info->pts); + unsigned int expected_w = 640; + unsigned int expected_h = 720; + if (frame > 30) { + expected_w = 1280; + expected_h = 720; + } + EXPECT_EQ(expected_w, info->w) + << "Frame " << frame << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) + << "Frame " << frame << " had unexpected height"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } +} + +TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) { + ResizingVideoSource video; + video.flag_codec_ = 1; + change_bitrate_ = false; + set_scale_mode_ = false; + set_scale_mode2_ = false; + set_scale_mode3_ = false; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + DefaultConfig(); + // Test external resizing with start resolution equal to + // 1. kInitialWidth and kInitialHeight + // 2. down-scaled kInitialWidth and kInitialHeight + for (int i = 0; i < 2; i++) { + video.change_start_resln_ = static_cast<bool>(i); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Check we decoded the same number of frames as we attempted to encode + ASSERT_EQ(frame_info_list_.size(), video.limit()); + for (const auto &info : frame_info_list_) { + const unsigned int frame = static_cast<unsigned>(info.pts); + unsigned int expected_w; + unsigned int expected_h; + ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, + video.flag_codec_, video.change_start_resln_, + &expected_w, &expected_h); + EXPECT_EQ(expected_w, info.w) + << "Frame " << frame << " had unexpected width"; + EXPECT_EQ(expected_h, info.h) + << "Frame " << frame << " had unexpected height"; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); + } + frame_info_list_.clear(); + } +} + +// Verify the dynamic resizer behavior for real time, 1 pass CBR mode. +// Run at low bitrate, with resize_allowed = 1, and verify that we get +// one resize down event. +TEST_P(ResizeRealtimeTest, TestInternalResizeDown) { + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + cfg_.g_w = 640; + cfg_.g_h = 480; + change_bitrate_ = false; + set_scale_mode_ = false; + set_scale_mode2_ = false; + set_scale_mode3_ = false; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + DefaultConfig(); + // Disable dropped frames. + cfg_.rc_dropframe_thresh = 0; + // Starting bitrate low. + cfg_.rc_target_bitrate = 150; + cfg_.rc_resize_mode = RESIZE_DYNAMIC; + cfg_.g_forced_max_frame_width = 1280; + cfg_.g_forced_max_frame_height = 1280; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + unsigned int last_w = cfg_.g_w; + unsigned int last_h = cfg_.g_h; + int resize_down_count = 0; + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + if (info->w != last_w || info->h != last_h) { + // Verify that resize down occurs. + if (info->w < last_w && info->h < last_h) { + resize_down_count++; + } + last_w = info->w; + last_h = info->h; + } + } + +#if CONFIG_AV1_DECODER + // Verify that we get at lease 1 resize down event in this test. + ASSERT_GE(resize_down_count, 1) << "Resizing should occur."; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); +#else + printf("Warning: AV1 decoder unavailable, unable to check resize count!\n"); +#endif +} + +// Verify the dynamic resizer behavior for real time, 1 pass CBR mode. +// Start at low target bitrate, raise the bitrate in the middle of the clip +// (at frame# = frame_change_bitrate_), scaling-up should occur after bitrate +// is increased. +TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) { + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + cfg_.g_w = 640; + cfg_.g_h = 480; + change_bitrate_ = true; + frame_change_bitrate_ = 120; + set_scale_mode_ = false; + set_scale_mode2_ = false; + set_scale_mode3_ = false; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + DefaultConfig(); + // Disable dropped frames. + cfg_.rc_dropframe_thresh = 0; + // Starting bitrate low. + cfg_.rc_target_bitrate = 150; + cfg_.rc_resize_mode = RESIZE_DYNAMIC; + cfg_.g_forced_max_frame_width = 1280; + cfg_.g_forced_max_frame_height = 1280; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + unsigned int last_w = cfg_.g_w; + unsigned int last_h = cfg_.g_h; + unsigned int frame_number = 0; + int resize_down_count = 0; + int resize_up_count = 0; + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + if (info->w != last_w || info->h != last_h) { + if (frame_number < frame_change_bitrate_) { + // Verify that resize down occurs, before bitrate is increased. + ASSERT_LT(info->w, last_w); + ASSERT_LT(info->h, last_h); + resize_down_count++; + } else { + // Verify that resize up occurs, after bitrate is increased. + ASSERT_GT(info->w, last_w); + ASSERT_GT(info->h, last_h); + resize_up_count++; + } + last_w = info->w; + last_h = info->h; + } + frame_number++; + } + +#if CONFIG_AV1_DECODER + // Verify that we get at least 2 resize events in this test. + ASSERT_GE(resize_up_count, 1) << "Resizing up should occur at lease once."; + ASSERT_GE(resize_down_count, 1) + << "Resizing down should occur at lease once."; + EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); +#else + printf("Warning: AV1 decoder unavailable, unable to check resize count!\n"); +#endif +} + +class ResizeCspTest : public ResizeTest { + protected: +#if WRITE_COMPRESSED_STREAM + ResizeCspTest() + : ResizeTest(), frame0_psnr_(0.0), outfile_(nullptr), out_frames_(0) {} +#else + ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {} +#endif + + ~ResizeCspTest() override = default; + + void BeginPassHook(unsigned int /*pass*/) override { +#if WRITE_COMPRESSED_STREAM + outfile_ = fopen("av11-2-05-cspchape.ivf", "wb"); +#endif + } + + void EndPassHook() override { +#if WRITE_COMPRESSED_STREAM + if (outfile_) { + if (!fseek(outfile_, 0, SEEK_SET)) + write_ivf_file_header(&cfg_, out_frames_, outfile_); + fclose(outfile_); + outfile_ = nullptr; + } +#endif + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0]; + EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0); + } + +#if WRITE_COMPRESSED_STREAM + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + ++out_frames_; + + // Write initial file header if first frame. + if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_); + + // Write frame header and data. + write_ivf_frame_header(pkt, outfile_); + (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); + } +#endif + + double frame0_psnr_; +#if WRITE_COMPRESSED_STREAM + FILE *outfile_; + unsigned int out_frames_; +#endif +}; + +class ResizingCspVideoSource : public ::libaom_test::DummyVideoSource { + public: + explicit ResizingCspVideoSource(aom_img_fmt_t image_format) { + SetSize(kInitialWidth, kInitialHeight); + SetImageFormat(image_format); + limit_ = 30; + } + + ~ResizingCspVideoSource() override = default; +}; + +#if (defined(DISABLE_TRELLISQ_SEARCH) && DISABLE_TRELLISQ_SEARCH) || \ + (defined(CONFIG_MAX_DECODE_PROFILE) && CONFIG_MAX_DECODE_PROFILE < 1) +TEST_P(ResizeCspTest, DISABLED_TestResizeCspWorks) { +#else +TEST_P(ResizeCspTest, TestResizeCspWorks) { +#endif + const aom_img_fmt_t image_formats[] = { AOM_IMG_FMT_I420, AOM_IMG_FMT_I444 }; + for (const aom_img_fmt_t &img_format : image_formats) { + ResizingCspVideoSource video(img_format); + init_flags_ = AOM_CODEC_USE_PSNR; + cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48; + cfg_.g_lag_in_frames = 0; + cfg_.g_profile = (img_format == AOM_IMG_FMT_I420) ? 0 : 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Check we decoded the same number of frames as we attempted to encode + ASSERT_EQ(frame_info_list_.size(), video.limit()); + frame_info_list_.clear(); + } +} + +#if !CONFIG_REALTIME_ONLY +// This class is used to check if there are any fatal +// failures while encoding with resize-mode > 0 +class ResizeModeTestLarge + : public ::libaom_test::CodecTestWith5Params<libaom_test::TestMode, int, + int, int, int>, + public ::libaom_test::EncoderTest { + protected: + ResizeModeTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + resize_mode_(GET_PARAM(2)), resize_denominator_(GET_PARAM(3)), + resize_kf_denominator_(GET_PARAM(4)), cpu_used_(GET_PARAM(5)) {} + ~ResizeModeTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = AOM_VBR; + cfg_.g_threads = 1; + cfg_.g_lag_in_frames = 35; + cfg_.rc_target_bitrate = 1000; + cfg_.rc_resize_mode = resize_mode_; + cfg_.rc_resize_denominator = resize_denominator_; + cfg_.rc_resize_kf_denominator = resize_kf_denominator_; + init_flags_ = AOM_CODEC_USE_PSNR; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + } + } + + ::libaom_test::TestMode encoding_mode_; + int resize_mode_; + int resize_denominator_; + int resize_kf_denominator_; + int cpu_used_; +}; + +TEST_P(ResizeModeTestLarge, ResizeModeTest) { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 30); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ResizeModeTestLarge); +AV1_INSTANTIATE_TEST_SUITE(ResizeModeTestLarge, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood), + ::testing::Values(1, 2), ::testing::Values(8, 12), + ::testing::Values(10, 14), ::testing::Values(3, 6)); +#endif // !CONFIG_REALTIME_ONLY + +AV1_INSTANTIATE_TEST_SUITE(ResizeTest, + ::testing::Values(::libaom_test::kRealTime)); +AV1_INSTANTIATE_TEST_SUITE(ResizeRealtimeTest, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(6, 10), ::testing::Values(1, 2, 4)); +AV1_INSTANTIATE_TEST_SUITE(ResizeCspTest, + ::testing::Values(::libaom_test::kRealTime)); + +// A test that reproduces crbug.com/1393384. In realtime usage mode, encode +// frames of sizes 202x202, 1x202, and 202x202. ASan should report no memory +// errors. +TEST(ResizeSimpleTest, TemporarySmallerFrameSize) { + constexpr int kWidth = 202; + constexpr int kHeight = 202; + // Dummy buffer of zero samples. + constexpr size_t kBufferSize = + kWidth * kHeight + 2 * (kWidth + 1) / 2 * (kHeight + 1) / 2; + std::vector<unsigned char> buffer(kBufferSize); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1, + buffer.data())); + aom_image_t img2; + EXPECT_EQ(&img2, aom_img_wrap(&img2, AOM_IMG_FMT_I420, 1, kHeight, 1, + buffer.data())); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME)); + cfg.g_w = kWidth; + cfg.g_h = kHeight; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + + cfg.g_w = 1; + cfg.g_h = kHeight; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img2, 1, 1, 0)); + + cfg.g_w = kWidth; + cfg.g_h = kHeight; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 2, 1, 0)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +// A test that reproduces crbug.com/1410766. In realtime usage mode +// for SVC with temporal layers, encode frames of sizes 600x600, +// 600x600, and 100x480. ASan should report no memory errors. +TEST(ResizeSimpleTest, SmallerFrameSizeSVC) { + constexpr int kWidth = 600; + constexpr int kHeight = 600; + // Dummy buffer of zero samples. + constexpr size_t kBufferSize = + kWidth * kHeight + 2 * (kWidth + 1) / 2 * (kHeight + 1) / 2; + std::vector<unsigned char> buffer(kBufferSize); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1, + buffer.data())); + aom_image_t img2; + EXPECT_EQ(&img2, + aom_img_wrap(&img2, AOM_IMG_FMT_I420, 100, 480, 1, buffer.data())); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME)); + cfg.g_w = kWidth; + cfg.g_h = kHeight; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 5)); + + aom_svc_params_t svc_params = {}; + aom_svc_layer_id_t layer_id; + svc_params.number_spatial_layers = 1; + svc_params.framerate_factor[0] = 2; + svc_params.framerate_factor[1] = 1; + svc_params.number_temporal_layers = 2; + // Bitrate allocation L0: 60% L1: 40% + svc_params.layer_target_bitrate[0] = 60 * cfg.rc_target_bitrate / 100; + svc_params.layer_target_bitrate[1] = cfg.rc_target_bitrate; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_SVC_PARAMS, &svc_params)); + + layer_id.spatial_layer_id = 0; + layer_id.temporal_layer_id = 0; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_SVC_LAYER_ID, &layer_id)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + + cfg.g_w = kWidth; + cfg.g_h = kHeight; + layer_id.temporal_layer_id = 1; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_SVC_LAYER_ID, &layer_id)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 1, 1, 0)); + + cfg.g_w = 100; + cfg.g_h = 480; + layer_id.temporal_layer_id = 0; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_SVC_LAYER_ID, &layer_id)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img2, 2, 1, 0)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +const int kUsages[] = +#if CONFIG_REALTIME_ONLY + { AOM_USAGE_REALTIME }; +#else + { AOM_USAGE_GOOD_QUALITY, AOM_USAGE_REALTIME, AOM_USAGE_ALL_INTRA }; +#endif + +const int kNumThreads[] = { 2, 4, 8 }; + +class FrameSizeChangeTest + : public ::libaom_test::CodecTestWith3Params<int, int, int> { + protected: + FrameSizeChangeTest() {} + ~FrameSizeChangeTest() override = default; + + void DoTest(int change_thread) { + usage_ = GET_PARAM(1); + cpu_used_ = GET_PARAM(2); + threads_ = GET_PARAM(3); + constexpr int kWidth = 512; + constexpr int kHeight = 512; + constexpr int kFirstWidth = 256; + constexpr int kFirstHeight = 256; + // Buffer of zero samples. + constexpr size_t kBufferSize = 3 * kWidth * kHeight; + std::vector<unsigned char> buffer(kBufferSize, + static_cast<unsigned char>(0)); + + aom_image_t img1; + EXPECT_EQ(&img1, aom_img_wrap(&img1, AOM_IMG_FMT_I420, kFirstWidth, + kFirstHeight, 1, buffer.data())); + + aom_image_t img2; + EXPECT_EQ(&img2, aom_img_wrap(&img2, AOM_IMG_FMT_I420, kWidth, kHeight, 1, + buffer.data())); + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(iface, &cfg, usage_)); + cfg.g_threads = threads_; + cfg.g_lag_in_frames = usage_ == AOM_USAGE_ALL_INTRA ? 0 : 1; + cfg.g_w = kFirstWidth; + cfg.g_h = kFirstHeight; + cfg.g_forced_max_frame_width = kWidth; + cfg.g_forced_max_frame_height = kHeight; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_CPUUSED, cpu_used_)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img1, 0, 1, 0)); + + if (change_thread == 1) { + cfg.g_threads = AOMMAX(1, threads_ / 2); + } else if (change_thread == 2) { + cfg.g_threads = threads_ * 2; + } + cfg.g_w = kWidth; + cfg.g_h = kHeight; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_set(&enc, &cfg)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img2, 1, 1, 0)); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 0, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); + } + + int cpu_used_; + int threads_; + int usage_; +}; + +TEST_P(FrameSizeChangeTest, FixedThreads) { DoTest(0); } +TEST_P(FrameSizeChangeTest, DecreasingThreads) { DoTest(1); } +TEST_P(FrameSizeChangeTest, IncreasingThreads) { DoTest(2); } + +AV1_INSTANTIATE_TEST_SUITE(FrameSizeChangeTest, ::testing::ValuesIn(kUsages), + ::testing::Range(6, 7), + ::testing::ValuesIn(kNumThreads)); + +} // namespace diff --git a/third_party/aom/test/rt_end_to_end_test.cc b/third_party/aom/test/rt_end_to_end_test.cc new file mode 100644 index 0000000000..f1f9e019c2 --- /dev/null +++ b/third_party/aom/test/rt_end_to_end_test.cc @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory> +#include <ostream> +#include <string> +#include <unordered_map> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "test/yuv_video_source.h" + +namespace { + +const unsigned int kFrames = 10; +const int kBitrate = 500; + +// List of psnr thresholds for speed settings 6-8 +// keys: video, speed, aq mode. +std::unordered_map<std::string, + std::unordered_map<int, std::unordered_map<int, double>>> + kPsnrThreshold = { { "park_joy_90p_8_420.y4m", + { { 5, { { 0, 35.4 }, { 3, 36.3 } } }, + { 6, { { 0, 35.3 }, { 3, 36.2 } } }, + { 7, { { 0, 34.9 }, { 3, 35.8 } } }, + { 8, { { 0, 35.0 }, { 3, 35.8 } } }, + { 9, { { 0, 34.9 }, { 3, 35.5 } } }, + { 10, { { 0, 34.7 }, { 3, 35.3 } } } } }, + { "paris_352_288_30.y4m", + { { 5, { { 0, 36.2 }, { 3, 36.7 } } }, + { 6, { { 0, 36.1 }, { 3, 36.48 } } }, + { 7, { { 0, 35.5 }, { 3, 36.0 } } }, + { 8, { { 0, 35.8 }, { 3, 36.4 } } }, + { 9, { { 0, 35.5 }, { 3, 36.0 } } }, + { 10, { { 0, 35.3 }, { 3, 35.9 } } } } }, + { "niklas_1280_720_30.y4m", + { { 5, { { 0, 34.4 }, { 3, 34.2 } } }, + { 6, { { 0, 34.1 }, { 3, 34.0 } } }, + { 7, { { 0, 33.5 }, { 3, 33.1 } } }, + { 8, { { 0, 33.3 }, { 3, 33.3 } } }, + { 9, { { 0, 33.3 }, { 3, 33.3 } } }, + { 10, { { 0, 33.1 }, { 3, 33.1 } } } } }, + { "hantro_collage_w352h288_nv12.yuv", + { { 5, { { 0, 34.4 }, { 3, 34.2 } } }, + { 6, { { 0, 34.1 }, { 3, 34.1 } } }, + { 7, { { 0, 33.6 }, { 3, 33.6 } } }, + { 8, { { 0, 33.3 }, { 3, 33.3 } } }, + { 9, { { 0, 33.3 }, { 3, 33.3 } } }, + { 10, { { 0, 33.1 }, { 3, 33.1 } } } } } }; + +typedef struct { + const char *filename; + unsigned int input_bit_depth; + aom_img_fmt fmt; + aom_bit_depth_t bit_depth; + unsigned int profile; +} TestVideoParam; + +std::ostream &operator<<(std::ostream &os, const TestVideoParam &test_arg) { + return os << "TestVideoParam { filename:" << test_arg.filename + << " input_bit_depth:" << test_arg.input_bit_depth + << " fmt:" << test_arg.fmt << " bit_depth:" << test_arg.bit_depth + << " profile:" << test_arg.profile << " }"; +} + +const TestVideoParam kTestVectors[] = { + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "paris_352_288_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "niklas_1280_720_30.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 }, + { "hantro_collage_w352h288_nv12.yuv", 8, AOM_IMG_FMT_NV12, AOM_BITS_8, 0 }, +}; + +// Params: test video, speed, aq mode, threads, tile columns, tile rows. +class RTEndToEndTest + : public ::libaom_test::CodecTestWith6Params<TestVideoParam, int, + unsigned int, int, int, int>, + public ::libaom_test::EncoderTest { + protected: + RTEndToEndTest() + : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), psnr_(0.0), nframes_(0), + aq_mode_(GET_PARAM(3)), threads_(GET_PARAM(4)), + tile_columns_(GET_PARAM(5)), tile_rows_(GET_PARAM(6)) {} + + ~RTEndToEndTest() override = default; + + void SetUp() override { + InitializeConfig(::libaom_test::kRealTime); + + cfg_.g_threads = threads_; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.kf_max_dist = 9999; + cfg_.kf_min_dist = 9999; + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_ENABLE_RESTORATION, 0); + encoder->Control(AV1E_SET_ENABLE_OBMC, 0); + encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + encoder->Control(AV1E_SET_ENABLE_WARPED_MOTION, 0); + encoder->Control(AV1E_SET_DELTAQ_MODE, 0); + encoder->Control(AV1E_SET_ENABLE_TPL_MODEL, 0); + encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1); + encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_); + encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_); + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); + encoder->Control(AV1E_SET_AQ_MODE, aq_mode_); + encoder->Control(AV1E_SET_ROW_MT, 1); + encoder->Control(AV1E_SET_ENABLE_CDEF, 1); + encoder->Control(AV1E_SET_COEFF_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_MODE_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_MV_COST_UPD_FREQ, 2); + encoder->Control(AV1E_SET_DV_COST_UPD_FREQ, 2); + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetPsnrThreshold() { + return kPsnrThreshold[test_video_param_.filename][cpu_used_][aq_mode_]; + } + + void DoTest() { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = AOM_CODEC_USE_PSNR; + if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH; + + std::unique_ptr<libaom_test::VideoSource> video; + if (is_extension_y4m(test_video_param_.filename)) + video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + else + video.reset(new libaom_test::YUVVideoSource(test_video_param_.filename, + test_video_param_.fmt, 352, + 288, 30, 1, 0, kFrames)); + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, GetPsnrThreshold()) + << "cpu used = " << cpu_used_ << " aq mode = " << aq_mode_; + } + + TestVideoParam test_video_param_; + int cpu_used_; + + private: + double psnr_; + unsigned int nframes_; + unsigned int aq_mode_; + int threads_; + int tile_columns_; + int tile_rows_; +}; + +class RTEndToEndTestThreaded : public RTEndToEndTest {}; + +TEST_P(RTEndToEndTest, EndtoEndPSNRTest) { DoTest(); } + +TEST_P(RTEndToEndTestThreaded, EndtoEndPSNRTest) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(RTEndToEndTest, ::testing::ValuesIn(kTestVectors), + ::testing::Range(5, 12), + ::testing::Values<unsigned int>(0, 3), + ::testing::Values(1), ::testing::Values(1), + ::testing::Values(1)); + +AV1_INSTANTIATE_TEST_SUITE(RTEndToEndTestThreaded, + ::testing::ValuesIn(kTestVectors), + ::testing::Range(5, 12), + ::testing::Values<unsigned int>(0, 3), + ::testing::Range(2, 6), ::testing::Range(1, 5), + ::testing::Range(1, 5)); +} // namespace diff --git a/third_party/aom/test/run_encodes.sh b/third_party/aom/test/run_encodes.sh new file mode 100755 index 0000000000..2096d8b158 --- /dev/null +++ b/third_party/aom/test/run_encodes.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# +# Copyright (c) 2016, Alliance for Open Media. All rights reserved. +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# +# Author: jimbankoski@google.com (Jim Bankoski) + +if [[ $# -ne 4 ]]; then + echo Encodes all the y4m files in the directory at the bitrates specified by + echo the first 3 parameters and stores the results in a subdirectory named by + echo the 4th parameter: + echo + echo Usage: run_encodes.sh start-kbps end-kbps step-kbps output-directory + echo Example: run_encodes.sh 200 500 50 baseline + exit +fi + +s=$1 +e=$2 +step=$3 +newdir=$4 + +for i in ./*y4m; do + for (( b=$s; b<= $e; b+= $step )) + do + best_encode.sh $i $b + done + mv opsnr.stt $i.stt +done + +mkdir $newdir +mv *.stt $newdir +mv *.webm $newdir diff --git a/third_party/aom/test/sad_test.cc b/third_party/aom/test/sad_test.cc new file mode 100644 index 0000000000..521274863c --- /dev/null +++ b/third_party/aom/test/sad_test.cc @@ -0,0 +1,3353 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string.h> +#include <limits.h> +#include <stdio.h> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "aom/aom_codec.h" +#include "aom_mem/aom_mem.h" +#include "aom_ports/mem.h" + +typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride); +typedef std::tuple<int, int, SadMxNFunc, int> SadMxNParam; + +typedef unsigned int (*SadSkipMxNFunc)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride); +typedef std::tuple<int, int, SadSkipMxNFunc, int> SadSkipMxNParam; + +typedef uint32_t (*SadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + const uint8_t *second_pred); +typedef std::tuple<int, int, SadMxNAvgFunc, int> SadMxNAvgParam; + +typedef unsigned int (*DistWtdSadMxhFunc)(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, int width, + int height); +typedef std::tuple<int, int, DistWtdSadMxhFunc, int> DistWtdSadMxhParam; + +typedef uint32_t (*DistWtdSadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + const uint8_t *second_pred, + const DIST_WTD_COMP_PARAMS *jcp_param); +typedef std::tuple<int, int, DistWtdSadMxNAvgFunc, int> DistWtdSadMxNAvgParam; + +typedef void (*SadMxNx4Func)(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_ptr[], int ref_stride, + uint32_t *sad_array); +typedef std::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param; + +typedef void (*SadSkipMxNx4Func)(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_ptr[], int ref_stride, + uint32_t *sad_array); +typedef std::tuple<int, int, SadSkipMxNx4Func, int> SadSkipMxNx4Param; + +typedef void (*SadMxNx4AvgFunc)(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_ptr[], int ref_stride, + const uint8_t *second_pred, + uint32_t *sad_array); +typedef std::tuple<int, int, SadMxNx4AvgFunc, int> SadMxNx4AvgParam; + +using libaom_test::ACMRandom; + +namespace { +class SADTestBase : public ::testing::Test { + public: + SADTestBase(int width, int height, int bit_depth) + : width_(width), height_(height), bd_(bit_depth) {} + + static void SetUpTestSuite() { + source_data8_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBlockSize)); + ASSERT_NE(source_data8_, nullptr); + reference_data8_ = reinterpret_cast<uint8_t *>( + aom_memalign(kDataAlignment, kDataBufferSize)); + ASSERT_NE(reference_data8_, nullptr); + second_pred8_ = + reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128)); + ASSERT_NE(second_pred8_, nullptr); + comp_pred8_ = + reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128)); + ASSERT_NE(comp_pred8_, nullptr); + comp_pred8_test_ = + reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128)); + ASSERT_NE(comp_pred8_test_, nullptr); + source_data16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, kDataBlockSize * sizeof(uint16_t))); + ASSERT_NE(source_data16_, nullptr); + reference_data16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t))); + ASSERT_NE(reference_data16_, nullptr); + second_pred16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t))); + ASSERT_NE(second_pred16_, nullptr); + comp_pred16_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t))); + ASSERT_NE(comp_pred16_, nullptr); + comp_pred16_test_ = reinterpret_cast<uint16_t *>( + aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t))); + ASSERT_NE(comp_pred16_test_, nullptr); + } + + static void TearDownTestSuite() { + aom_free(source_data8_); + source_data8_ = nullptr; + aom_free(reference_data8_); + reference_data8_ = nullptr; + aom_free(second_pred8_); + second_pred8_ = nullptr; + aom_free(comp_pred8_); + comp_pred8_ = nullptr; + aom_free(comp_pred8_test_); + comp_pred8_test_ = nullptr; + aom_free(source_data16_); + source_data16_ = nullptr; + aom_free(reference_data16_); + reference_data16_ = nullptr; + aom_free(second_pred16_); + second_pred16_ = nullptr; + aom_free(comp_pred16_); + comp_pred16_ = nullptr; + aom_free(comp_pred16_test_); + comp_pred16_test_ = nullptr; + } + + protected: + // Handle up to 4 128x128 blocks, with stride up to 256 + static const int kDataAlignment = 16; + static const int kDataBlockSize = 128 * 256; + static const int kDataBufferSize = 4 * kDataBlockSize; + + void SetUp() override { + if (bd_ == -1) { + use_high_bit_depth_ = false; + bit_depth_ = AOM_BITS_8; + source_data_ = source_data8_; + reference_data_ = reference_data8_; + second_pred_ = second_pred8_; + comp_pred_ = comp_pred8_; + comp_pred_test_ = comp_pred8_test_; + } else { + use_high_bit_depth_ = true; + bit_depth_ = static_cast<aom_bit_depth_t>(bd_); + source_data_ = CONVERT_TO_BYTEPTR(source_data16_); + reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_); + second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_); + comp_pred_ = CONVERT_TO_BYTEPTR(comp_pred16_); + comp_pred_test_ = CONVERT_TO_BYTEPTR(comp_pred16_test_); + } + mask_ = (1 << bit_depth_) - 1; + source_stride_ = (width_ + 31) & ~31; + reference_stride_ = width_ * 2; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + virtual uint8_t *GetReference(int block_idx) { + if (use_high_bit_depth_) + return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) + + block_idx * kDataBlockSize); + return reference_data_ + block_idx * kDataBlockSize; + } + + // Sum of Absolute Differences. Given two blocks, calculate the absolute + // difference between two pixels in the same relative location; accumulate. + unsigned int ReferenceSAD(int block_idx) { + unsigned int sad = 0; + const uint8_t *const reference8 = GetReference(block_idx); + const uint8_t *const source8 = source_data_; + const uint16_t *const reference16 = + CONVERT_TO_SHORTPTR(GetReference(block_idx)); + const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_); + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + sad += abs(source8[h * source_stride_ + w] - + reference8[h * reference_stride_ + w]); + } else { + sad += abs(source16[h * source_stride_ + w] - + reference16[h * reference_stride_ + w]); + } + } + } + return sad; + } + + // Sum of Absolute Differences Skip rows. Given two blocks, + // calculate the absolute difference between two pixels in the same + // relative location every other row; accumulate and double the result at the + // end. + unsigned int ReferenceSADSkip(int block_idx) { + unsigned int sad = 0; + const uint8_t *const reference8 = GetReference(block_idx); + const uint8_t *const source8 = source_data_; + const uint16_t *const reference16 = + CONVERT_TO_SHORTPTR(GetReference(block_idx)); + const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_); + for (int h = 0; h < height_; h += 2) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + sad += abs(source8[h * source_stride_ + w] - + reference8[h * reference_stride_ + w]); + } else { + sad += abs(source16[h * source_stride_ + w] - + reference16[h * reference_stride_ + w]); + } + } + } + return sad * 2; + } + + // Sum of Absolute Differences Average. Given two blocks, and a prediction + // calculate the absolute difference between one pixel and average of the + // corresponding and predicted pixels; accumulate. + unsigned int ReferenceSADavg(int block_idx) { + unsigned int sad = 0; + const uint8_t *const reference8 = GetReference(block_idx); + const uint8_t *const source8 = source_data_; + const uint8_t *const second_pred8 = second_pred_; + const uint16_t *const reference16 = + CONVERT_TO_SHORTPTR(GetReference(block_idx)); + const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_); + const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_); + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + const int tmp = second_pred8[h * width_ + w] + + reference8[h * reference_stride_ + w]; + const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1); + sad += abs(source8[h * source_stride_ + w] - comp_pred); + } else { + const int tmp = second_pred16[h * width_ + w] + + reference16[h * reference_stride_ + w]; + const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1); + sad += abs(source16[h * source_stride_ + w] - comp_pred); + } + } + } + return sad; + } + + unsigned int ReferenceDistWtdSADavg(int block_idx) { + unsigned int sad = 0; + const uint8_t *const reference8 = GetReference(block_idx); + const uint8_t *const source8 = source_data_; + const uint8_t *const second_pred8 = second_pred_; + const uint16_t *const reference16 = + CONVERT_TO_SHORTPTR(GetReference(block_idx)); + const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_); + const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_); + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + const int tmp = + second_pred8[h * width_ + w] * jcp_param_.bck_offset + + reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset; + const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 4); + sad += abs(source8[h * source_stride_ + w] - comp_pred); + } else { + const int tmp = + second_pred16[h * width_ + w] * jcp_param_.bck_offset + + reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset; + const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 4); + sad += abs(source16[h * source_stride_ + w] - comp_pred); + } + } + } + return sad; + } + + void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) { + uint8_t *data8 = data; + uint16_t *data16 = CONVERT_TO_SHORTPTR(data); + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + data8[h * stride + w] = static_cast<uint8_t>(fill_constant); + } else { + data16[h * stride + w] = fill_constant; + } + } + } + } + + void FillRandom(uint8_t *data, int stride) { + uint8_t *data8 = data; + uint16_t *data16 = CONVERT_TO_SHORTPTR(data); + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + if (!use_high_bit_depth_) { + data8[h * stride + w] = rnd_.Rand8(); + } else { + data16[h * stride + w] = rnd_.Rand16() & mask_; + } + } + } + } + + virtual void SADForSpeedTest(unsigned int *results, + const uint8_t *const *references) { + (void)results; + (void)references; + } + + void SpeedSAD() { + int test_count = 20000000; + unsigned int exp_sad[4]; + const uint8_t *references[] = { GetReference(0), GetReference(1), + GetReference(2), GetReference(3) }; + aom_usec_timer timer; + aom_usec_timer_start(&timer); + while (test_count > 0) { + SADForSpeedTest(exp_sad, references); + test_count -= 1; + } + aom_usec_timer_mark(&timer); + const int64_t time = aom_usec_timer_elapsed(&timer) / 1000; + std::cout << "BLOCK_" << width_ << "X" << height_ + << ", bit_depth:" << bit_depth_ << ",Time: " << time << "ms" + << std::endl; + } + + int width_, height_, mask_, bd_; + aom_bit_depth_t bit_depth_; + static uint8_t *source_data_; + static uint8_t *reference_data_; + static uint8_t *second_pred_; + int source_stride_; + bool use_high_bit_depth_; + static uint8_t *source_data8_; + static uint8_t *reference_data8_; + static uint8_t *second_pred8_; + static uint16_t *source_data16_; + static uint16_t *reference_data16_; + static uint16_t *second_pred16_; + int reference_stride_; + static uint8_t *comp_pred_; + static uint8_t *comp_pred8_; + static uint16_t *comp_pred16_; + static uint8_t *comp_pred_test_; + static uint8_t *comp_pred8_test_; + static uint16_t *comp_pred16_test_; + DIST_WTD_COMP_PARAMS jcp_param_; + + ACMRandom rnd_; +}; + +class SADx4Test : public ::testing::WithParamInterface<SadMxNx4Param>, + public SADTestBase { + public: + SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + void SADs(unsigned int *results) { + const uint8_t *references[] = { GetReference(0), GetReference(1), + GetReference(2), GetReference(3) }; + + API_REGISTER_STATE_CHECK(GET_PARAM(2)( + source_data_, source_stride_, references, reference_stride_, results)); + } + + void SADForSpeedTest(unsigned int *results, + const uint8_t *const *references) override { + GET_PARAM(2) + (source_data_, source_stride_, references, reference_stride_, results); + } + + void CheckSADs() { + unsigned int reference_sad, exp_sad[4]; + SADs(exp_sad); + for (int block = 0; block < 4; ++block) { + reference_sad = ReferenceSAD(block); + + EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block; + } + } +}; + +class SADx3Test : public ::testing::WithParamInterface<SadMxNx4Param>, + public SADTestBase { + public: + SADx3Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + void SADs(unsigned int *results) { + const uint8_t *references[] = { GetReference(0), GetReference(1), + GetReference(2), GetReference(3) }; + + GET_PARAM(2) + (source_data_, source_stride_, references, reference_stride_, results); + } + + void SADForSpeedTest(unsigned int *results, + const uint8_t *const *references) override { + GET_PARAM(2) + (source_data_, source_stride_, references, reference_stride_, results); + } + + void CheckSADs() { + unsigned int reference_sad, exp_sad[4]; + + SADs(exp_sad); + for (int block = 0; block < 3; ++block) { + reference_sad = ReferenceSAD(block); + + EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block; + } + } +}; + +class SADSkipx4Test : public ::testing::WithParamInterface<SadMxNx4Param>, + public SADTestBase { + public: + SADSkipx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + void SADs(unsigned int *results) { + const uint8_t *references[] = { GetReference(0), GetReference(1), + GetReference(2), GetReference(3) }; + + API_REGISTER_STATE_CHECK(GET_PARAM(2)( + source_data_, source_stride_, references, reference_stride_, results)); + } + + void CheckSADs() { + unsigned int reference_sad, exp_sad[4]; + + SADs(exp_sad); + for (int block = 0; block < 4; ++block) { + reference_sad = ReferenceSADSkip(block); + + EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block; + } + } + + void SADForSpeedTest(unsigned int *results, + const uint8_t *const *references) override { + GET_PARAM(2) + (source_data_, source_stride_, references, reference_stride_, results); + } +}; + +class SADTest : public ::testing::WithParamInterface<SadMxNParam>, + public SADTestBase { + public: + SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + unsigned int SAD(int block_idx) { + unsigned int ret; + const uint8_t *const reference = GetReference(block_idx); + + API_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_, + reference, reference_stride_)); + return ret; + } + + void CheckSAD() { + const unsigned int reference_sad = ReferenceSAD(0); + const unsigned int exp_sad = SAD(0); + + ASSERT_EQ(reference_sad, exp_sad); + } + + void SADForSpeedTest(unsigned int *results, + const uint8_t *const *references) override { + GET_PARAM(2) + (source_data_, source_stride_, references[0], reference_stride_); + (void)results; + } +}; + +class SADSkipTest : public ::testing::WithParamInterface<SadMxNParam>, + public SADTestBase { + public: + SADSkipTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + unsigned int SAD(int block_idx) { + unsigned int ret; + const uint8_t *const reference = GetReference(block_idx); + + API_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_, + reference, reference_stride_)); + return ret; + } + + void CheckSAD() { + const unsigned int reference_sad = ReferenceSADSkip(0); + const unsigned int exp_sad = SAD(0); + + ASSERT_EQ(reference_sad, exp_sad); + } + + void SADForSpeedTest(unsigned int *results, + const uint8_t *const *references) override { + GET_PARAM(2) + (source_data_, source_stride_, references[0], reference_stride_); + (void)results; + } +}; + +class SADavgTest : public ::testing::WithParamInterface<SadMxNAvgParam>, + public SADTestBase { + public: + SADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + unsigned int SAD_avg(int block_idx) { + unsigned int ret; + const uint8_t *const reference = GetReference(block_idx); + + API_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_, + reference, reference_stride_, + second_pred_)); + return ret; + } + + void CheckSAD() { + const unsigned int reference_sad = ReferenceSADavg(0); + const unsigned int exp_sad = SAD_avg(0); + + ASSERT_EQ(reference_sad, exp_sad); + } +}; + +class DistWtdSADavgTest + : public ::testing::WithParamInterface<DistWtdSadMxNAvgParam>, + public SADTestBase { + public: + DistWtdSADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {} + + protected: + unsigned int dist_wtd_SAD_avg(int block_idx) { + unsigned int ret; + const uint8_t *const reference = GetReference(block_idx); + + API_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_, + reference, reference_stride_, + second_pred_, &jcp_param_)); + return ret; + } + + void CheckSAD() { + for (int j = 0; j < 2; ++j) { + for (int i = 0; i < 4; ++i) { + jcp_param_.fwd_offset = quant_dist_lookup_table[i][j]; + jcp_param_.bck_offset = quant_dist_lookup_table[i][1 - j]; + + const unsigned int reference_sad = ReferenceDistWtdSADavg(0); + const unsigned int exp_sad = dist_wtd_SAD_avg(0); + + ASSERT_EQ(reference_sad, exp_sad); + } + } + } +}; + +uint8_t *SADTestBase::source_data_ = nullptr; +uint8_t *SADTestBase::reference_data_ = nullptr; +uint8_t *SADTestBase::second_pred_ = nullptr; +uint8_t *SADTestBase::comp_pred_ = nullptr; +uint8_t *SADTestBase::comp_pred_test_ = nullptr; +uint8_t *SADTestBase::source_data8_ = nullptr; +uint8_t *SADTestBase::reference_data8_ = nullptr; +uint8_t *SADTestBase::second_pred8_ = nullptr; +uint8_t *SADTestBase::comp_pred8_ = nullptr; +uint8_t *SADTestBase::comp_pred8_test_ = nullptr; +uint16_t *SADTestBase::source_data16_ = nullptr; +uint16_t *SADTestBase::reference_data16_ = nullptr; +uint16_t *SADTestBase::second_pred16_ = nullptr; +uint16_t *SADTestBase::comp_pred16_ = nullptr; +uint16_t *SADTestBase::comp_pred16_test_ = nullptr; + +TEST_P(SADTest, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(reference_data_, reference_stride_, mask_); + CheckSAD(); +} + +TEST_P(SADTest, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(reference_data_, reference_stride_, 0); + CheckSAD(); +} + +TEST_P(SADTest, ShortRef) { + const int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADTest, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADTest, ShortSrc) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 2000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + if (testing::Test::HasFatalFailure()) break; + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +TEST_P(SADTest, DISABLED_Speed) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + SpeedSAD(); + source_stride_ = tmp_stride; +} + +TEST_P(SADSkipTest, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(reference_data_, reference_stride_, mask_); + CheckSAD(); +} + +TEST_P(SADSkipTest, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(reference_data_, reference_stride_, 0); + CheckSAD(); +} + +TEST_P(SADSkipTest, ShortRef) { + const int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADSkipTest, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADSkipTest, ShortSrc) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 2000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + CheckSAD(); + if (testing::Test::HasFatalFailure()) break; + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +TEST_P(SADSkipTest, DISABLED_Speed) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + SpeedSAD(); + source_stride_ = tmp_stride; +} + +TEST_P(SADavgTest, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(reference_data_, reference_stride_, mask_); + FillConstant(second_pred_, width_, 0); + CheckSAD(); +} +TEST_P(SADavgTest, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(reference_data_, reference_stride_, 0); + FillConstant(second_pred_, width_, 0); + CheckSAD(); +} + +TEST_P(SADavgTest, ShortRef) { + const int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADavgTest, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADavgTest, ShortSrc) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 2000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckSAD(); + if (testing::Test::HasFatalFailure()) break; + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +TEST_P(DistWtdSADavgTest, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(reference_data_, reference_stride_, mask_); + FillConstant(second_pred_, width_, 0); + CheckSAD(); +} +TEST_P(DistWtdSADavgTest, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(reference_data_, reference_stride_, 0); + FillConstant(second_pred_, width_, 0); + CheckSAD(); +} + +TEST_P(DistWtdSADavgTest, ShortRef) { + const int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(DistWtdSADavgTest, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + const int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckSAD(); + reference_stride_ = tmp_stride; +} + +TEST_P(DistWtdSADavgTest, ShortSrc) { + const int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 2000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(reference_data_, reference_stride_); + FillRandom(second_pred_, width_); + CheckSAD(); + if (testing::Test::HasFatalFailure()) break; + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +// SADx4 +TEST_P(SADx4Test, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(GetReference(0), reference_stride_, mask_); + FillConstant(GetReference(1), reference_stride_, mask_); + FillConstant(GetReference(2), reference_stride_, mask_); + FillConstant(GetReference(3), reference_stride_, mask_); + CheckSADs(); +} + +TEST_P(SADx4Test, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(GetReference(0), reference_stride_, 0); + FillConstant(GetReference(1), reference_stride_, 0); + FillConstant(GetReference(2), reference_stride_, 0); + FillConstant(GetReference(3), reference_stride_, 0); + CheckSADs(); +} + +TEST_P(SADx4Test, ShortRef) { + int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADx4Test, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADx4Test, ShortSrc) { + int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 1000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +TEST_P(SADx4Test, SrcAlignedByWidth) { + uint8_t *tmp_source_data = source_data_; + source_data_ += width_; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + source_data_ = tmp_source_data; +} + +TEST_P(SADx4Test, DISABLED_Speed) { + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + SpeedSAD(); +} + +// SADx3 +TEST_P(SADx3Test, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(GetReference(0), reference_stride_, mask_); + FillConstant(GetReference(1), reference_stride_, mask_); + FillConstant(GetReference(2), reference_stride_, mask_); + FillConstant(GetReference(3), reference_stride_, mask_); + CheckSADs(); +} + +TEST_P(SADx3Test, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(GetReference(0), reference_stride_, 0); + FillConstant(GetReference(1), reference_stride_, 0); + FillConstant(GetReference(2), reference_stride_, 0); + FillConstant(GetReference(3), reference_stride_, 0); + CheckSADs(); +} + +TEST_P(SADx3Test, ShortRef) { + int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADx3Test, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADx3Test, ShortSrc) { + int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 1000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +TEST_P(SADx3Test, SrcAlignedByWidth) { + uint8_t *tmp_source_data = source_data_; + source_data_ += width_; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + source_data_ = tmp_source_data; +} + +TEST_P(SADx3Test, DISABLED_Speed) { + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + SpeedSAD(); +} + +// SADSkipx4 +TEST_P(SADSkipx4Test, MaxRef) { + FillConstant(source_data_, source_stride_, 0); + FillConstant(GetReference(0), reference_stride_, mask_); + FillConstant(GetReference(1), reference_stride_, mask_); + FillConstant(GetReference(2), reference_stride_, mask_); + FillConstant(GetReference(3), reference_stride_, mask_); + CheckSADs(); +} + +TEST_P(SADSkipx4Test, MaxSrc) { + FillConstant(source_data_, source_stride_, mask_); + FillConstant(GetReference(0), reference_stride_, 0); + FillConstant(GetReference(1), reference_stride_, 0); + FillConstant(GetReference(2), reference_stride_, 0); + FillConstant(GetReference(3), reference_stride_, 0); + CheckSADs(); +} + +TEST_P(SADSkipx4Test, ShortRef) { + int tmp_stride = reference_stride_; + reference_stride_ >>= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADSkipx4Test, UnalignedRef) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + reference_stride_ = tmp_stride; +} + +TEST_P(SADSkipx4Test, ShortSrc) { + int tmp_stride = source_stride_; + source_stride_ >>= 1; + int test_count = 1000; + while (test_count > 0) { + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + test_count -= 1; + } + source_stride_ = tmp_stride; +} + +TEST_P(SADSkipx4Test, SrcAlignedByWidth) { + uint8_t *tmp_source_data = source_data_; + source_data_ += width_; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + CheckSADs(); + source_data_ = tmp_source_data; +} + +TEST_P(SADSkipx4Test, DISABLED_Speed) { + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + SpeedSAD(); +} + +using std::make_tuple; + +//------------------------------------------------------------------------------ +// C functions +const SadMxNParam c_tests[] = { + make_tuple(128, 128, &aom_sad128x128_c, -1), + make_tuple(128, 64, &aom_sad128x64_c, -1), + make_tuple(64, 128, &aom_sad64x128_c, -1), + make_tuple(64, 64, &aom_sad64x64_c, -1), + make_tuple(64, 32, &aom_sad64x32_c, -1), + make_tuple(32, 64, &aom_sad32x64_c, -1), + make_tuple(32, 32, &aom_sad32x32_c, -1), + make_tuple(32, 16, &aom_sad32x16_c, -1), + make_tuple(16, 32, &aom_sad16x32_c, -1), + make_tuple(16, 16, &aom_sad16x16_c, -1), + make_tuple(16, 8, &aom_sad16x8_c, -1), + make_tuple(8, 16, &aom_sad8x16_c, -1), + make_tuple(8, 8, &aom_sad8x8_c, -1), + make_tuple(8, 4, &aom_sad8x4_c, -1), + make_tuple(4, 8, &aom_sad4x8_c, -1), + make_tuple(4, 4, &aom_sad4x4_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128_c, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_c, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_c, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_c, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_c, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_c, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_c, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_c, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_c, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_c, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_c, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_c, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_c, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_c, 8), + make_tuple(4, 8, &aom_highbd_sad4x8_c, 8), + make_tuple(4, 4, &aom_highbd_sad4x4_c, 8), + make_tuple(128, 128, &aom_highbd_sad128x128_c, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_c, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_c, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_c, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_c, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_c, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_c, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_c, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_c, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_c, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_c, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_c, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_c, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_c, 10), + make_tuple(4, 8, &aom_highbd_sad4x8_c, 10), + make_tuple(4, 4, &aom_highbd_sad4x4_c, 10), + make_tuple(128, 128, &aom_highbd_sad128x128_c, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_c, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_c, 12), + make_tuple(64, 64, &aom_highbd_sad64x64_c, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_c, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_c, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_c, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_c, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_c, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_c, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_c, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_c, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_c, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_c, 12), + make_tuple(4, 8, &aom_highbd_sad4x8_c, 12), + make_tuple(4, 4, &aom_highbd_sad4x4_c, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16_c, -1), + make_tuple(16, 64, &aom_sad16x64_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16_c, 8), + make_tuple(16, 64, &aom_highbd_sad16x64_c, 8), + make_tuple(64, 16, &aom_highbd_sad64x16_c, 10), + make_tuple(16, 64, &aom_highbd_sad16x64_c, 10), + make_tuple(64, 16, &aom_highbd_sad64x16_c, 12), + make_tuple(16, 64, &aom_highbd_sad16x64_c, 12), +#endif + make_tuple(32, 8, &aom_sad32x8_c, -1), + make_tuple(8, 32, &aom_sad8x32_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(32, 8, &aom_highbd_sad32x8_c, 8), + make_tuple(8, 32, &aom_highbd_sad8x32_c, 8), + make_tuple(32, 8, &aom_highbd_sad32x8_c, 10), + make_tuple(8, 32, &aom_highbd_sad8x32_c, 10), + make_tuple(32, 8, &aom_highbd_sad32x8_c, 12), + make_tuple(8, 32, &aom_highbd_sad8x32_c, 12), +#endif + make_tuple(16, 4, &aom_sad16x4_c, -1), + make_tuple(4, 16, &aom_sad4x16_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(16, 4, &aom_highbd_sad16x4_c, 8), + make_tuple(4, 16, &aom_highbd_sad4x16_c, 8), + make_tuple(16, 4, &aom_highbd_sad16x4_c, 10), + make_tuple(4, 16, &aom_highbd_sad4x16_c, 10), + make_tuple(16, 4, &aom_highbd_sad16x4_c, 12), + make_tuple(4, 16, &aom_highbd_sad4x16_c, 12), +#endif +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(C, SADTest, ::testing::ValuesIn(c_tests)); + +const SadSkipMxNParam skip_c_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128_c, -1), + make_tuple(128, 64, &aom_sad_skip_128x64_c, -1), + make_tuple(64, 128, &aom_sad_skip_64x128_c, -1), + make_tuple(64, 64, &aom_sad_skip_64x64_c, -1), + make_tuple(64, 32, &aom_sad_skip_64x32_c, -1), + make_tuple(32, 64, &aom_sad_skip_32x64_c, -1), + make_tuple(32, 32, &aom_sad_skip_32x32_c, -1), + make_tuple(32, 16, &aom_sad_skip_32x16_c, -1), + make_tuple(16, 32, &aom_sad_skip_16x32_c, -1), + make_tuple(16, 16, &aom_sad_skip_16x16_c, -1), + make_tuple(16, 8, &aom_sad_skip_16x8_c, -1), + make_tuple(8, 16, &aom_sad_skip_8x16_c, -1), + make_tuple(8, 8, &aom_sad_skip_8x8_c, -1), + make_tuple(8, 4, &aom_sad_skip_8x4_c, -1), + make_tuple(4, 8, &aom_sad_skip_4x8_c, -1), + make_tuple(4, 4, &aom_sad_skip_4x4_c, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad_skip_64x16_c, -1), + make_tuple(16, 64, &aom_sad_skip_16x64_c, -1), + make_tuple(32, 8, &aom_sad_skip_32x8_c, -1), + make_tuple(8, 32, &aom_sad_skip_8x32_c, -1), + make_tuple(16, 4, &aom_sad_skip_16x4_c, -1), + make_tuple(4, 16, &aom_sad_skip_4x16_c, -1), +#endif +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad_skip_128x128_c, 8), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64_c, 8), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128_c, 8), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_c, 8), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_c, 8), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_c, 8), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_c, 8), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_c, 8), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_c, 8), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_c, 8), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_c, 8), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16_c, 8), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8_c, 8), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4_c, 8), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8_c, 8), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4_c, 8), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16_c, 8), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_c, 8), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8_c, 8), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32_c, 8), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4_c, 8), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16_c, 8), +#endif + make_tuple(128, 128, &aom_highbd_sad_skip_128x128_c, 10), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64_c, 10), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128_c, 10), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_c, 10), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_c, 10), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_c, 10), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_c, 10), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_c, 10), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_c, 10), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_c, 10), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_c, 10), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16_c, 10), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8_c, 10), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4_c, 10), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8_c, 10), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4_c, 10), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16_c, 10), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_c, 10), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8_c, 10), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32_c, 10), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4_c, 10), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16_c, 10), +#endif + make_tuple(128, 128, &aom_highbd_sad_skip_128x128_c, 12), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64_c, 12), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128_c, 12), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_c, 12), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_c, 12), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_c, 12), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_c, 12), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_c, 12), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_c, 12), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_c, 12), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_c, 12), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16_c, 12), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8_c, 12), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4_c, 12), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8_c, 12), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4_c, 12), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16_c, 12), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_c, 12), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8_c, 12), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32_c, 12), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4_c, 12), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16_c, 12), +#endif // !CONFIG_REALTIME_ONLY +#endif // CONFIG_AV1_HIGHBITDEPTH +}; +INSTANTIATE_TEST_SUITE_P(C, SADSkipTest, ::testing::ValuesIn(skip_c_tests)); + +const SadMxNAvgParam avg_c_tests[] = { + make_tuple(128, 128, &aom_sad128x128_avg_c, -1), + make_tuple(128, 64, &aom_sad128x64_avg_c, -1), + make_tuple(64, 128, &aom_sad64x128_avg_c, -1), + make_tuple(64, 64, &aom_sad64x64_avg_c, -1), + make_tuple(64, 32, &aom_sad64x32_avg_c, -1), + make_tuple(32, 64, &aom_sad32x64_avg_c, -1), + make_tuple(32, 32, &aom_sad32x32_avg_c, -1), + make_tuple(32, 16, &aom_sad32x16_avg_c, -1), + make_tuple(16, 32, &aom_sad16x32_avg_c, -1), + make_tuple(16, 16, &aom_sad16x16_avg_c, -1), + make_tuple(16, 8, &aom_sad16x8_avg_c, -1), + make_tuple(8, 16, &aom_sad8x16_avg_c, -1), + make_tuple(8, 8, &aom_sad8x8_avg_c, -1), + make_tuple(8, 4, &aom_sad8x4_avg_c, -1), + make_tuple(4, 8, &aom_sad4x8_avg_c, -1), + make_tuple(4, 4, &aom_sad4x4_avg_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 8), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 8), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 8), + make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 10), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 10), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 10), + make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 12), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 12), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 12), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16_avg_c, -1), + make_tuple(16, 64, &aom_sad16x64_avg_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16_avg_c, 8), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_c, 8), + make_tuple(64, 16, &aom_highbd_sad64x16_avg_c, 10), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_c, 10), + make_tuple(64, 16, &aom_highbd_sad64x16_avg_c, 12), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_c, 12), +#endif + make_tuple(32, 8, &aom_sad32x8_avg_c, -1), + make_tuple(8, 32, &aom_sad8x32_avg_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(32, 8, &aom_highbd_sad32x8_avg_c, 8), + make_tuple(8, 32, &aom_highbd_sad8x32_avg_c, 8), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_c, 10), + make_tuple(8, 32, &aom_highbd_sad8x32_avg_c, 10), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_c, 12), + make_tuple(8, 32, &aom_highbd_sad8x32_avg_c, 12), +#endif + make_tuple(16, 4, &aom_sad16x4_avg_c, -1), + make_tuple(4, 16, &aom_sad4x16_avg_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(16, 4, &aom_highbd_sad16x4_avg_c, 8), + make_tuple(4, 16, &aom_highbd_sad4x16_avg_c, 8), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_c, 10), + make_tuple(4, 16, &aom_highbd_sad4x16_avg_c, 10), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_c, 12), + make_tuple(4, 16, &aom_highbd_sad4x16_avg_c, 12), +#endif +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests)); + +const DistWtdSadMxNAvgParam dist_wtd_avg_c_tests[] = { + make_tuple(128, 128, &aom_dist_wtd_sad128x128_avg_c, -1), + make_tuple(128, 64, &aom_dist_wtd_sad128x64_avg_c, -1), + make_tuple(64, 128, &aom_dist_wtd_sad64x128_avg_c, -1), + make_tuple(64, 64, &aom_dist_wtd_sad64x64_avg_c, -1), + make_tuple(64, 32, &aom_dist_wtd_sad64x32_avg_c, -1), + make_tuple(32, 64, &aom_dist_wtd_sad32x64_avg_c, -1), + make_tuple(32, 32, &aom_dist_wtd_sad32x32_avg_c, -1), + make_tuple(32, 16, &aom_dist_wtd_sad32x16_avg_c, -1), + make_tuple(16, 32, &aom_dist_wtd_sad16x32_avg_c, -1), + make_tuple(16, 16, &aom_dist_wtd_sad16x16_avg_c, -1), + make_tuple(16, 8, &aom_dist_wtd_sad16x8_avg_c, -1), + make_tuple(8, 16, &aom_dist_wtd_sad8x16_avg_c, -1), + make_tuple(8, 8, &aom_dist_wtd_sad8x8_avg_c, -1), + make_tuple(8, 4, &aom_dist_wtd_sad8x4_avg_c, -1), + make_tuple(4, 8, &aom_dist_wtd_sad4x8_avg_c, -1), + make_tuple(4, 4, &aom_dist_wtd_sad4x4_avg_c, -1), + +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_dist_wtd_sad64x16_avg_c, -1), + make_tuple(16, 64, &aom_dist_wtd_sad16x64_avg_c, -1), + make_tuple(32, 8, &aom_dist_wtd_sad32x8_avg_c, -1), + make_tuple(8, 32, &aom_dist_wtd_sad8x32_avg_c, -1), + make_tuple(16, 4, &aom_dist_wtd_sad16x4_avg_c, -1), + make_tuple(4, 16, &aom_dist_wtd_sad4x16_avg_c, -1), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(C, DistWtdSADavgTest, + ::testing::ValuesIn(dist_wtd_avg_c_tests)); + +const SadMxNx4Param x4d_c_tests[] = { + make_tuple(128, 128, &aom_sad128x128x4d_c, -1), + make_tuple(128, 64, &aom_sad128x64x4d_c, -1), + make_tuple(64, 128, &aom_sad64x128x4d_c, -1), + make_tuple(64, 64, &aom_sad64x64x4d_c, -1), + make_tuple(64, 32, &aom_sad64x32x4d_c, -1), + make_tuple(32, 64, &aom_sad32x64x4d_c, -1), + make_tuple(32, 32, &aom_sad32x32x4d_c, -1), + make_tuple(32, 16, &aom_sad32x16x4d_c, -1), + make_tuple(16, 32, &aom_sad16x32x4d_c, -1), + make_tuple(16, 16, &aom_sad16x16x4d_c, -1), + make_tuple(16, 8, &aom_sad16x8x4d_c, -1), + make_tuple(8, 16, &aom_sad8x16x4d_c, -1), + make_tuple(8, 8, &aom_sad8x8x4d_c, -1), + make_tuple(8, 4, &aom_sad8x4x4d_c, -1), + make_tuple(4, 8, &aom_sad4x8x4d_c, -1), + make_tuple(4, 4, &aom_sad4x4x4d_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 8), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 8), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 8), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 8), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 8), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 8), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 8), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 8), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 8), + make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 10), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 10), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 10), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 10), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 10), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 10), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 10), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 10), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 10), + make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 12), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 12), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 12), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 12), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 12), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 12), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 12), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 12), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 12), +#endif +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16x4d_c, -1), + make_tuple(16, 64, &aom_sad16x64x4d_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16x4d_c, 8), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_c, 8), + make_tuple(64, 16, &aom_highbd_sad64x16x4d_c, 10), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_c, 10), + make_tuple(64, 16, &aom_highbd_sad64x16x4d_c, 12), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_c, 12), +#endif + make_tuple(32, 8, &aom_sad32x8x4d_c, -1), + make_tuple(8, 32, &aom_sad8x32x4d_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(32, 8, &aom_highbd_sad32x8x4d_c, 8), + make_tuple(8, 32, &aom_highbd_sad8x32x4d_c, 8), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_c, 10), + make_tuple(8, 32, &aom_highbd_sad8x32x4d_c, 10), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_c, 12), + make_tuple(8, 32, &aom_highbd_sad8x32x4d_c, 12), +#endif + make_tuple(16, 4, &aom_sad16x4x4d_c, -1), + make_tuple(4, 16, &aom_sad4x16x4d_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(16, 4, &aom_highbd_sad16x4x4d_c, 8), + make_tuple(4, 16, &aom_highbd_sad4x16x4d_c, 8), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_c, 10), + make_tuple(4, 16, &aom_highbd_sad4x16x4d_c, 10), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_c, 12), + make_tuple(4, 16, &aom_highbd_sad4x16x4d_c, 12), +#endif +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests)); + +const SadMxNx4Param x3d_c_tests[] = { + make_tuple(128, 128, &aom_sad128x128x3d_c, -1), + make_tuple(128, 64, &aom_sad128x64x3d_c, -1), + make_tuple(64, 128, &aom_sad64x128x3d_c, -1), + make_tuple(64, 64, &aom_sad64x64x3d_c, -1), + make_tuple(64, 32, &aom_sad64x32x3d_c, -1), + make_tuple(32, 64, &aom_sad32x64x3d_c, -1), + make_tuple(32, 32, &aom_sad32x32x3d_c, -1), + make_tuple(32, 16, &aom_sad32x16x3d_c, -1), + make_tuple(16, 32, &aom_sad16x32x3d_c, -1), + make_tuple(16, 16, &aom_sad16x16x3d_c, -1), + make_tuple(16, 8, &aom_sad16x8x3d_c, -1), + make_tuple(8, 16, &aom_sad8x16x3d_c, -1), + make_tuple(8, 8, &aom_sad8x8x3d_c, -1), + make_tuple(8, 4, &aom_sad8x4x3d_c, -1), + make_tuple(4, 8, &aom_sad4x8x3d_c, -1), + make_tuple(4, 4, &aom_sad4x4x3d_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128x3d_c, 8), + make_tuple(128, 64, &aom_highbd_sad128x64x3d_c, 8), + make_tuple(64, 128, &aom_highbd_sad64x128x3d_c, 8), + make_tuple(64, 64, &aom_highbd_sad64x64x3d_c, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x3d_c, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x3d_c, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x3d_c, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x3d_c, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x3d_c, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x3d_c, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x3d_c, 8), + make_tuple(8, 16, &aom_highbd_sad8x16x3d_c, 8), + make_tuple(8, 8, &aom_highbd_sad8x8x3d_c, 8), + make_tuple(8, 4, &aom_highbd_sad8x4x3d_c, 8), + make_tuple(4, 8, &aom_highbd_sad4x8x3d_c, 8), + make_tuple(4, 4, &aom_highbd_sad4x4x3d_c, 8), + make_tuple(128, 128, &aom_highbd_sad128x128x3d_c, 10), + make_tuple(128, 64, &aom_highbd_sad128x64x3d_c, 10), + make_tuple(64, 128, &aom_highbd_sad64x128x3d_c, 10), + make_tuple(64, 64, &aom_highbd_sad64x64x3d_c, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x3d_c, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x3d_c, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x3d_c, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x3d_c, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x3d_c, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x3d_c, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x3d_c, 10), + make_tuple(8, 16, &aom_highbd_sad8x16x3d_c, 10), + make_tuple(8, 8, &aom_highbd_sad8x8x3d_c, 10), + make_tuple(8, 4, &aom_highbd_sad8x4x3d_c, 10), + make_tuple(4, 8, &aom_highbd_sad4x8x3d_c, 10), + make_tuple(4, 4, &aom_highbd_sad4x4x3d_c, 10), + make_tuple(128, 128, &aom_highbd_sad128x128x3d_c, 12), + make_tuple(128, 64, &aom_highbd_sad128x64x3d_c, 12), + make_tuple(64, 128, &aom_highbd_sad64x128x3d_c, 12), + make_tuple(64, 64, &aom_highbd_sad64x64x3d_c, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x3d_c, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x3d_c, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x3d_c, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x3d_c, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x3d_c, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x3d_c, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x3d_c, 12), + make_tuple(8, 16, &aom_highbd_sad8x16x3d_c, 12), + make_tuple(8, 8, &aom_highbd_sad8x8x3d_c, 12), + make_tuple(8, 4, &aom_highbd_sad8x4x3d_c, 12), + make_tuple(4, 8, &aom_highbd_sad4x8x3d_c, 12), + make_tuple(4, 4, &aom_highbd_sad4x4x3d_c, 12), +#endif +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16x3d_c, -1), + make_tuple(16, 64, &aom_sad16x64x3d_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16x3d_c, 8), + make_tuple(16, 64, &aom_highbd_sad16x64x3d_c, 8), + make_tuple(64, 16, &aom_highbd_sad64x16x3d_c, 10), + make_tuple(16, 64, &aom_highbd_sad16x64x3d_c, 10), + make_tuple(64, 16, &aom_highbd_sad64x16x3d_c, 12), + make_tuple(16, 64, &aom_highbd_sad16x64x3d_c, 12), +#endif + make_tuple(32, 8, &aom_sad32x8x3d_c, -1), + make_tuple(8, 32, &aom_sad8x32x3d_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(32, 8, &aom_highbd_sad32x8x3d_c, 8), + make_tuple(8, 32, &aom_highbd_sad8x32x3d_c, 8), + make_tuple(32, 8, &aom_highbd_sad32x8x3d_c, 10), + make_tuple(8, 32, &aom_highbd_sad8x32x3d_c, 10), + make_tuple(32, 8, &aom_highbd_sad32x8x3d_c, 12), + make_tuple(8, 32, &aom_highbd_sad8x32x3d_c, 12), +#endif + make_tuple(16, 4, &aom_sad16x4x3d_c, -1), + make_tuple(4, 16, &aom_sad4x16x3d_c, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(16, 4, &aom_highbd_sad16x4x3d_c, 8), + make_tuple(4, 16, &aom_highbd_sad4x16x3d_c, 8), + make_tuple(16, 4, &aom_highbd_sad16x4x3d_c, 10), + make_tuple(4, 16, &aom_highbd_sad4x16x3d_c, 10), + make_tuple(16, 4, &aom_highbd_sad16x4x3d_c, 12), + make_tuple(4, 16, &aom_highbd_sad4x16x3d_c, 12), +#endif +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(C, SADx3Test, ::testing::ValuesIn(x3d_c_tests)); + +const SadMxNx4Param skip_x4d_c_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128x4d_c, -1), + make_tuple(128, 64, &aom_sad_skip_128x64x4d_c, -1), + make_tuple(64, 128, &aom_sad_skip_64x128x4d_c, -1), + make_tuple(64, 64, &aom_sad_skip_64x64x4d_c, -1), + make_tuple(64, 32, &aom_sad_skip_64x32x4d_c, -1), + make_tuple(32, 64, &aom_sad_skip_32x64x4d_c, -1), + make_tuple(32, 32, &aom_sad_skip_32x32x4d_c, -1), + make_tuple(32, 16, &aom_sad_skip_32x16x4d_c, -1), + make_tuple(16, 32, &aom_sad_skip_16x32x4d_c, -1), + make_tuple(16, 16, &aom_sad_skip_16x16x4d_c, -1), + make_tuple(16, 8, &aom_sad_skip_16x8x4d_c, -1), + make_tuple(8, 16, &aom_sad_skip_8x16x4d_c, -1), + make_tuple(8, 8, &aom_sad_skip_8x8x4d_c, -1), + make_tuple(4, 8, &aom_sad_skip_4x8x4d_c, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad_skip_64x16x4d_c, -1), + make_tuple(16, 64, &aom_sad_skip_16x64x4d_c, -1), + make_tuple(32, 8, &aom_sad_skip_32x8x4d_c, -1), + make_tuple(8, 32, &aom_sad_skip_8x32x4d_c, -1), + make_tuple(4, 16, &aom_sad_skip_4x16x4d_c, -1), +#endif +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_c, 8), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_c, 8), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_c, 8), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_c, 8), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_c, 8), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_c, 8), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_c, 8), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_c, 8), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_c, 8), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_c, 8), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_c, 8), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_c, 8), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_c, 8), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_c, 8), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_c, 8), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_c, 8), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_c, 8), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_c, 8), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_c, 8), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_c, 8), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_c, 8), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_c, 8), +#endif + + make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_c, 10), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_c, 10), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_c, 10), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_c, 10), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_c, 10), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_c, 10), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_c, 10), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_c, 10), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_c, 10), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_c, 10), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_c, 10), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_c, 10), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_c, 10), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_c, 10), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_c, 10), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_c, 10), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_c, 10), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_c, 10), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_c, 10), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_c, 10), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_c, 10), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_c, 10), +#endif + + make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_c, 12), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_c, 12), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_c, 12), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_c, 12), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_c, 12), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_c, 12), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_c, 12), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_c, 12), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_c, 12), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_c, 12), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_c, 12), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_c, 12), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_c, 12), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_c, 12), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_c, 12), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_c, 12), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_c, 12), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_c, 12), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_c, 12), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_c, 12), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_c, 12), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_c, 12), +#endif +#endif // CONFIG_AV1_HIGHBITDEPTH +}; +INSTANTIATE_TEST_SUITE_P(C, SADSkipx4Test, + ::testing::ValuesIn(skip_x4d_c_tests)); + +//------------------------------------------------------------------------------ +// ARM functions +#if HAVE_NEON +const SadMxNParam neon_tests[] = { + make_tuple(128, 128, &aom_sad128x128_neon, -1), + make_tuple(128, 64, &aom_sad128x64_neon, -1), + make_tuple(64, 128, &aom_sad64x128_neon, -1), + make_tuple(64, 64, &aom_sad64x64_neon, -1), + make_tuple(64, 32, &aom_sad64x32_neon, -1), + make_tuple(32, 64, &aom_sad32x64_neon, -1), + make_tuple(32, 32, &aom_sad32x32_neon, -1), + make_tuple(32, 16, &aom_sad32x16_neon, -1), + make_tuple(16, 32, &aom_sad16x32_neon, -1), + make_tuple(16, 16, &aom_sad16x16_neon, -1), + make_tuple(16, 8, &aom_sad16x8_neon, -1), + make_tuple(8, 16, &aom_sad8x16_neon, -1), + make_tuple(8, 8, &aom_sad8x8_neon, -1), + make_tuple(8, 4, &aom_sad8x4_neon, -1), + make_tuple(4, 8, &aom_sad4x8_neon, -1), + make_tuple(4, 4, &aom_sad4x4_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128_neon, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_neon, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_neon, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_neon, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_neon, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_neon, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_neon, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_neon, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_neon, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_neon, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_neon, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_neon, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_neon, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_neon, 8), + make_tuple(4, 8, &aom_highbd_sad4x8_neon, 8), + make_tuple(4, 4, &aom_highbd_sad4x4_neon, 8), + make_tuple(128, 128, &aom_highbd_sad128x128_neon, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_neon, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_neon, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_neon, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_neon, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_neon, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_neon, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_neon, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_neon, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_neon, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_neon, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_neon, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_neon, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_neon, 10), + make_tuple(4, 8, &aom_highbd_sad4x8_neon, 10), + make_tuple(4, 4, &aom_highbd_sad4x4_neon, 10), + make_tuple(128, 128, &aom_highbd_sad128x128_neon, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_neon, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_neon, 12), + make_tuple(64, 64, &aom_highbd_sad64x64_neon, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_neon, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_neon, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_neon, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_neon, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_neon, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_neon, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_neon, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_neon, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_neon, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_neon, 12), + make_tuple(4, 8, &aom_highbd_sad4x8_neon, 12), + make_tuple(4, 4, &aom_highbd_sad4x4_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16_neon, -1), + make_tuple(32, 8, &aom_sad32x8_neon, -1), + make_tuple(16, 64, &aom_sad16x64_neon, -1), + make_tuple(16, 4, &aom_sad16x4_neon, -1), + make_tuple(8, 32, &aom_sad8x32_neon, -1), + make_tuple(4, 16, &aom_sad4x16_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16_neon, 8), + make_tuple(16, 64, &aom_highbd_sad16x64_neon, 8), + make_tuple(32, 8, &aom_highbd_sad32x8_neon, 8), + make_tuple(8, 32, &aom_highbd_sad8x32_neon, 8), + make_tuple(16, 4, &aom_highbd_sad16x4_neon, 8), + make_tuple(4, 16, &aom_highbd_sad4x16_neon, 8), + make_tuple(64, 16, &aom_highbd_sad64x16_neon, 10), + make_tuple(16, 64, &aom_highbd_sad16x64_neon, 10), + make_tuple(32, 8, &aom_highbd_sad32x8_neon, 10), + make_tuple(8, 32, &aom_highbd_sad8x32_neon, 10), + make_tuple(16, 4, &aom_highbd_sad16x4_neon, 10), + make_tuple(4, 16, &aom_highbd_sad4x16_neon, 10), + make_tuple(64, 16, &aom_highbd_sad64x16_neon, 12), + make_tuple(16, 64, &aom_highbd_sad16x64_neon, 12), + make_tuple(32, 8, &aom_highbd_sad32x8_neon, 12), + make_tuple(8, 32, &aom_highbd_sad8x32_neon, 12), + make_tuple(16, 4, &aom_highbd_sad16x4_neon, 12), + make_tuple(4, 16, &aom_highbd_sad4x16_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests)); + +const SadMxNx4Param x4d_neon_tests[] = { + make_tuple(128, 128, &aom_sad128x128x4d_neon, -1), + make_tuple(128, 64, &aom_sad128x64x4d_neon, -1), + make_tuple(64, 128, &aom_sad64x128x4d_neon, -1), + make_tuple(64, 64, &aom_sad64x64x4d_neon, -1), + make_tuple(64, 32, &aom_sad64x32x4d_neon, -1), + make_tuple(32, 64, &aom_sad32x64x4d_neon, -1), + make_tuple(32, 32, &aom_sad32x32x4d_neon, -1), + make_tuple(32, 16, &aom_sad32x16x4d_neon, -1), + make_tuple(16, 32, &aom_sad16x32x4d_neon, -1), + make_tuple(16, 16, &aom_sad16x16x4d_neon, -1), + make_tuple(16, 8, &aom_sad16x8x4d_neon, -1), + make_tuple(8, 16, &aom_sad8x16x4d_neon, -1), + make_tuple(8, 8, &aom_sad8x8x4d_neon, -1), + make_tuple(8, 4, &aom_sad8x4x4d_neon, -1), + make_tuple(4, 8, &aom_sad4x8x4d_neon, -1), + make_tuple(4, 4, &aom_sad4x4x4d_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128x4d_neon, 8), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_neon, 8), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_neon, 8), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_neon, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_neon, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_neon, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_neon, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_neon, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_neon, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_neon, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_neon, 8), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_neon, 8), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_neon, 8), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_neon, 8), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_neon, 8), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_neon, 8), + make_tuple(128, 128, &aom_highbd_sad128x128x4d_neon, 10), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_neon, 10), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_neon, 10), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_neon, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_neon, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_neon, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_neon, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_neon, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_neon, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_neon, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_neon, 10), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_neon, 10), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_neon, 10), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_neon, 10), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_neon, 10), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_neon, 10), + make_tuple(128, 128, &aom_highbd_sad128x128x4d_neon, 12), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_neon, 12), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_neon, 12), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_neon, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_neon, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_neon, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_neon, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_neon, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_neon, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_neon, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_neon, 12), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_neon, 12), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_neon, 12), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_neon, 12), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_neon, 12), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16x4d_neon, -1), + make_tuple(32, 8, &aom_sad32x8x4d_neon, -1), + make_tuple(16, 64, &aom_sad16x64x4d_neon, -1), + make_tuple(16, 4, &aom_sad16x4x4d_neon, -1), + make_tuple(8, 32, &aom_sad8x32x4d_neon, -1), + make_tuple(4, 16, &aom_sad4x16x4d_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16x4d_neon, 8), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_neon, 8), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_neon, 8), + make_tuple(8, 32, &aom_highbd_sad8x32x4d_neon, 8), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_neon, 8), + make_tuple(4, 16, &aom_highbd_sad4x16x4d_neon, 8), + make_tuple(64, 16, &aom_highbd_sad64x16x4d_neon, 10), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_neon, 10), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_neon, 10), + make_tuple(8, 32, &aom_highbd_sad8x32x4d_neon, 10), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_neon, 10), + make_tuple(4, 16, &aom_highbd_sad4x16x4d_neon, 10), + make_tuple(64, 16, &aom_highbd_sad64x16x4d_neon, 12), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_neon, 12), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_neon, 12), + make_tuple(8, 32, &aom_highbd_sad8x32x4d_neon, 12), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_neon, 12), + make_tuple(4, 16, &aom_highbd_sad4x16x4d_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests)); +const SadSkipMxNParam skip_neon_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128_neon, -1), + make_tuple(128, 64, &aom_sad_skip_128x64_neon, -1), + make_tuple(64, 128, &aom_sad_skip_64x128_neon, -1), + make_tuple(64, 64, &aom_sad_skip_64x64_neon, -1), + make_tuple(64, 32, &aom_sad_skip_64x32_neon, -1), + make_tuple(32, 64, &aom_sad_skip_32x64_neon, -1), + make_tuple(32, 32, &aom_sad_skip_32x32_neon, -1), + make_tuple(32, 16, &aom_sad_skip_32x16_neon, -1), + make_tuple(16, 32, &aom_sad_skip_16x32_neon, -1), + make_tuple(16, 16, &aom_sad_skip_16x16_neon, -1), + make_tuple(16, 8, &aom_sad_skip_16x8_neon, -1), + make_tuple(8, 16, &aom_sad_skip_8x16_neon, -1), + make_tuple(8, 8, &aom_sad_skip_8x8_neon, -1), + make_tuple(8, 4, &aom_sad_skip_8x4_neon, -1), + make_tuple(4, 8, &aom_sad_skip_4x8_neon, -1), + make_tuple(4, 4, &aom_sad_skip_4x4_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad_skip_128x128_neon, 8), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64_neon, 8), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128_neon, 8), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_neon, 8), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_neon, 8), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_neon, 8), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_neon, 8), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_neon, 8), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_neon, 8), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_neon, 8), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_neon, 8), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16_neon, 8), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8_neon, 8), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4_neon, 8), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8_neon, 8), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4_neon, 8), + make_tuple(128, 128, &aom_highbd_sad_skip_128x128_neon, 10), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64_neon, 10), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128_neon, 10), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_neon, 10), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_neon, 10), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_neon, 10), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_neon, 10), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_neon, 10), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_neon, 10), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_neon, 10), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_neon, 10), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16_neon, 10), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8_neon, 10), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4_neon, 10), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8_neon, 10), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4_neon, 10), + make_tuple(128, 128, &aom_highbd_sad_skip_128x128_neon, 12), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64_neon, 12), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128_neon, 12), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_neon, 12), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_neon, 12), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_neon, 12), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_neon, 12), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_neon, 12), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_neon, 12), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_neon, 12), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_neon, 12), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16_neon, 12), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8_neon, 12), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4_neon, 12), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8_neon, 12), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad_skip_64x16_neon, -1), + make_tuple(32, 8, &aom_sad_skip_32x8_neon, -1), + make_tuple(16, 64, &aom_sad_skip_16x64_neon, -1), + make_tuple(16, 4, &aom_sad_skip_16x4_neon, -1), + make_tuple(8, 32, &aom_sad_skip_8x32_neon, -1), + make_tuple(4, 16, &aom_sad_skip_4x16_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad_skip_64x16_neon, 8), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_neon, 8), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8_neon, 8), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32_neon, 8), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4_neon, 8), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16_neon, 8), + make_tuple(64, 16, &aom_highbd_sad_skip_64x16_neon, 10), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_neon, 10), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8_neon, 10), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32_neon, 10), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4_neon, 10), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16_neon, 10), + make_tuple(64, 16, &aom_highbd_sad_skip_64x16_neon, 12), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_neon, 12), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8_neon, 12), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32_neon, 12), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4_neon, 12), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON, SADSkipTest, + ::testing::ValuesIn(skip_neon_tests)); + +const SadSkipMxNx4Param skip_x4d_neon_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128x4d_neon, -1), + make_tuple(128, 64, &aom_sad_skip_128x64x4d_neon, -1), + make_tuple(64, 128, &aom_sad_skip_64x128x4d_neon, -1), + make_tuple(64, 64, &aom_sad_skip_64x64x4d_neon, -1), + make_tuple(64, 32, &aom_sad_skip_64x32x4d_neon, -1), + make_tuple(32, 64, &aom_sad_skip_32x64x4d_neon, -1), + make_tuple(32, 32, &aom_sad_skip_32x32x4d_neon, -1), + make_tuple(32, 16, &aom_sad_skip_32x16x4d_neon, -1), + make_tuple(16, 32, &aom_sad_skip_16x32x4d_neon, -1), + make_tuple(16, 16, &aom_sad_skip_16x16x4d_neon, -1), + make_tuple(16, 8, &aom_sad_skip_16x8x4d_neon, -1), + make_tuple(8, 16, &aom_sad_skip_8x16x4d_neon, -1), + make_tuple(8, 8, &aom_sad_skip_8x8x4d_neon, -1), + make_tuple(8, 4, &aom_sad_skip_8x4x4d_neon, -1), + make_tuple(4, 8, &aom_sad_skip_4x8x4d_neon, -1), + make_tuple(4, 4, &aom_sad_skip_4x4x4d_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_neon, 8), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_neon, 8), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_neon, 8), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_neon, 8), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_neon, 8), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_neon, 8), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_neon, 8), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_neon, 8), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_neon, 8), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_neon, 8), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_neon, 8), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_neon, 8), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_neon, 8), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_neon, 8), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_neon, 8), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_neon, 8), + make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_neon, 10), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_neon, 10), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_neon, 10), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_neon, 10), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_neon, 10), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_neon, 10), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_neon, 10), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_neon, 10), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_neon, 10), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_neon, 10), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_neon, 10), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_neon, 10), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_neon, 10), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_neon, 10), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_neon, 10), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_neon, 10), + make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_neon, 12), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_neon, 12), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_neon, 12), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_neon, 12), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_neon, 12), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_neon, 12), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_neon, 12), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_neon, 12), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_neon, 12), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_neon, 12), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_neon, 12), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_neon, 12), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_neon, 12), + make_tuple(8, 4, &aom_highbd_sad_skip_8x4x4d_neon, 12), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_neon, 12), + make_tuple(4, 4, &aom_highbd_sad_skip_4x4x4d_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad_skip_64x16x4d_neon, -1), + make_tuple(32, 8, &aom_sad_skip_32x8x4d_neon, -1), + make_tuple(16, 64, &aom_sad_skip_16x64x4d_neon, -1), + make_tuple(16, 4, &aom_sad_skip_16x4x4d_neon, -1), + make_tuple(8, 32, &aom_sad_skip_8x32x4d_neon, -1), + make_tuple(4, 16, &aom_sad_skip_4x16x4d_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_neon, 8), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_neon, 8), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_neon, 8), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_neon, 8), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_neon, 8), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_neon, 8), + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_neon, 10), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_neon, 10), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_neon, 10), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_neon, 10), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_neon, 10), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_neon, 10), + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_neon, 12), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_neon, 12), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_neon, 12), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_neon, 12), + make_tuple(16, 4, &aom_highbd_sad_skip_16x4x4d_neon, 12), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON, SADSkipx4Test, + ::testing::ValuesIn(skip_x4d_neon_tests)); + +const SadMxNAvgParam avg_neon_tests[] = { + make_tuple(128, 128, &aom_sad128x128_avg_neon, -1), + make_tuple(128, 64, &aom_sad128x64_avg_neon, -1), + make_tuple(64, 128, &aom_sad64x128_avg_neon, -1), + make_tuple(64, 64, &aom_sad64x64_avg_neon, -1), + make_tuple(64, 32, &aom_sad64x32_avg_neon, -1), + make_tuple(32, 64, &aom_sad32x64_avg_neon, -1), + make_tuple(32, 32, &aom_sad32x32_avg_neon, -1), + make_tuple(32, 16, &aom_sad32x16_avg_neon, -1), + make_tuple(16, 32, &aom_sad16x32_avg_neon, -1), + make_tuple(16, 16, &aom_sad16x16_avg_neon, -1), + make_tuple(16, 8, &aom_sad16x8_avg_neon, -1), + make_tuple(8, 16, &aom_sad8x16_avg_neon, -1), + make_tuple(8, 8, &aom_sad8x8_avg_neon, -1), + make_tuple(8, 4, &aom_sad8x4_avg_neon, -1), + make_tuple(4, 8, &aom_sad4x8_avg_neon, -1), + make_tuple(4, 4, &aom_sad4x4_avg_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128_avg_neon, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_neon, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_neon, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_neon, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_neon, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_neon, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_neon, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_neon, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_neon, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_neon, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_neon, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_neon, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_neon, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_neon, 8), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_neon, 8), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_neon, 8), + make_tuple(128, 128, &aom_highbd_sad128x128_avg_neon, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_neon, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_neon, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_neon, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_neon, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_neon, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_neon, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_neon, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_neon, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_neon, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_neon, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_neon, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_neon, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_neon, 10), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_neon, 10), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_neon, 10), + make_tuple(128, 128, &aom_highbd_sad128x128_avg_neon, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_neon, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_neon, 12), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_neon, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_neon, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_neon, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_neon, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_neon, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_neon, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_neon, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_neon, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_neon, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_neon, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_neon, 12), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_neon, 12), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16_avg_neon, -1), + make_tuple(32, 8, &aom_sad32x8_avg_neon, -1), + make_tuple(16, 64, &aom_sad16x64_avg_neon, -1), + make_tuple(16, 4, &aom_sad16x4_avg_neon, -1), + make_tuple(8, 32, &aom_sad8x32_avg_neon, -1), + make_tuple(4, 16, &aom_sad4x16_avg_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16_avg_neon, 8), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_neon, 8), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_neon, 8), + make_tuple(8, 32, &aom_highbd_sad8x32_avg_neon, 8), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_neon, 8), + make_tuple(4, 16, &aom_highbd_sad4x16_avg_neon, 8), + make_tuple(64, 16, &aom_highbd_sad64x16_avg_neon, 10), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_neon, 10), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_neon, 10), + make_tuple(8, 32, &aom_highbd_sad8x32_avg_neon, 10), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_neon, 10), + make_tuple(4, 16, &aom_highbd_sad4x16_avg_neon, 10), + make_tuple(64, 16, &aom_highbd_sad64x16_avg_neon, 12), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_neon, 12), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_neon, 12), + make_tuple(8, 32, &aom_highbd_sad8x32_avg_neon, 12), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_neon, 12), + make_tuple(4, 16, &aom_highbd_sad4x16_avg_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON, SADavgTest, ::testing::ValuesIn(avg_neon_tests)); + +const DistWtdSadMxNAvgParam dist_wtd_avg_neon_tests[] = { + make_tuple(128, 128, &aom_dist_wtd_sad128x128_avg_neon, -1), + make_tuple(128, 64, &aom_dist_wtd_sad128x64_avg_neon, -1), + make_tuple(64, 128, &aom_dist_wtd_sad64x128_avg_neon, -1), + make_tuple(64, 64, &aom_dist_wtd_sad64x64_avg_neon, -1), + make_tuple(64, 32, &aom_dist_wtd_sad64x32_avg_neon, -1), + make_tuple(32, 64, &aom_dist_wtd_sad32x64_avg_neon, -1), + make_tuple(32, 32, &aom_dist_wtd_sad32x32_avg_neon, -1), + make_tuple(32, 16, &aom_dist_wtd_sad32x16_avg_neon, -1), + make_tuple(16, 32, &aom_dist_wtd_sad16x32_avg_neon, -1), + make_tuple(16, 16, &aom_dist_wtd_sad16x16_avg_neon, -1), + make_tuple(16, 8, &aom_dist_wtd_sad16x8_avg_neon, -1), + make_tuple(8, 16, &aom_dist_wtd_sad8x16_avg_neon, -1), + make_tuple(8, 8, &aom_dist_wtd_sad8x8_avg_neon, -1), + make_tuple(8, 4, &aom_dist_wtd_sad8x4_avg_neon, -1), + make_tuple(4, 8, &aom_dist_wtd_sad4x8_avg_neon, -1), + make_tuple(4, 4, &aom_dist_wtd_sad4x4_avg_neon, -1), + +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_dist_wtd_sad64x16_avg_neon, -1), + make_tuple(16, 64, &aom_dist_wtd_sad16x64_avg_neon, -1), + make_tuple(32, 8, &aom_dist_wtd_sad32x8_avg_neon, -1), + make_tuple(8, 32, &aom_dist_wtd_sad8x32_avg_neon, -1), + make_tuple(16, 4, &aom_dist_wtd_sad16x4_avg_neon, -1), + make_tuple(4, 16, &aom_dist_wtd_sad4x16_avg_neon, -1), +#endif // !CONFIG_REALTIME_ONLY +}; + +INSTANTIATE_TEST_SUITE_P(NEON, DistWtdSADavgTest, + ::testing::ValuesIn(dist_wtd_avg_neon_tests)); + +const SadMxNx4Param x3d_neon_tests[] = { + make_tuple(128, 128, &aom_sad128x128x3d_neon, -1), + make_tuple(128, 64, &aom_sad128x64x3d_neon, -1), + make_tuple(64, 128, &aom_sad64x128x3d_neon, -1), + make_tuple(64, 64, &aom_sad64x64x3d_neon, -1), + make_tuple(64, 32, &aom_sad64x32x3d_neon, -1), + make_tuple(32, 64, &aom_sad32x64x3d_neon, -1), + make_tuple(32, 32, &aom_sad32x32x3d_neon, -1), + make_tuple(32, 16, &aom_sad32x16x3d_neon, -1), + make_tuple(16, 32, &aom_sad16x32x3d_neon, -1), + make_tuple(16, 16, &aom_sad16x16x3d_neon, -1), + make_tuple(16, 8, &aom_sad16x8x3d_neon, -1), + make_tuple(8, 16, &aom_sad8x16x3d_neon, -1), + make_tuple(8, 8, &aom_sad8x8x3d_neon, -1), + make_tuple(8, 4, &aom_sad8x4x3d_neon, -1), + make_tuple(4, 8, &aom_sad4x8x3d_neon, -1), + make_tuple(4, 4, &aom_sad4x4x3d_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128x3d_neon, 8), + make_tuple(128, 64, &aom_highbd_sad128x64x3d_neon, 8), + make_tuple(64, 128, &aom_highbd_sad64x128x3d_neon, 8), + make_tuple(64, 64, &aom_highbd_sad64x64x3d_neon, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x3d_neon, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x3d_neon, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x3d_neon, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x3d_neon, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x3d_neon, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x3d_neon, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x3d_neon, 8), + make_tuple(8, 16, &aom_highbd_sad8x16x3d_neon, 8), + make_tuple(8, 8, &aom_highbd_sad8x8x3d_neon, 8), + make_tuple(8, 4, &aom_highbd_sad8x4x3d_neon, 8), + make_tuple(4, 8, &aom_highbd_sad4x8x3d_neon, 8), + make_tuple(4, 4, &aom_highbd_sad4x4x3d_neon, 8), + make_tuple(128, 128, &aom_highbd_sad128x128x3d_neon, 10), + make_tuple(128, 64, &aom_highbd_sad128x64x3d_neon, 10), + make_tuple(64, 128, &aom_highbd_sad64x128x3d_neon, 10), + make_tuple(64, 64, &aom_highbd_sad64x64x3d_neon, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x3d_neon, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x3d_neon, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x3d_neon, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x3d_neon, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x3d_neon, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x3d_neon, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x3d_neon, 10), + make_tuple(8, 16, &aom_highbd_sad8x16x3d_neon, 10), + make_tuple(8, 8, &aom_highbd_sad8x8x3d_neon, 10), + make_tuple(8, 4, &aom_highbd_sad8x4x3d_neon, 10), + make_tuple(4, 8, &aom_highbd_sad4x8x3d_neon, 10), + make_tuple(4, 4, &aom_highbd_sad4x4x3d_neon, 10), + make_tuple(128, 128, &aom_highbd_sad128x128x3d_neon, 12), + make_tuple(128, 64, &aom_highbd_sad128x64x3d_neon, 12), + make_tuple(64, 128, &aom_highbd_sad64x128x3d_neon, 12), + make_tuple(64, 64, &aom_highbd_sad64x64x3d_neon, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x3d_neon, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x3d_neon, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x3d_neon, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x3d_neon, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x3d_neon, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x3d_neon, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x3d_neon, 12), + make_tuple(8, 16, &aom_highbd_sad8x16x3d_neon, 12), + make_tuple(8, 8, &aom_highbd_sad8x8x3d_neon, 12), + make_tuple(8, 4, &aom_highbd_sad8x4x3d_neon, 12), + make_tuple(4, 8, &aom_highbd_sad4x8x3d_neon, 12), + make_tuple(4, 4, &aom_highbd_sad4x4x3d_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16x3d_neon, -1), + make_tuple(32, 8, &aom_sad32x8x3d_neon, -1), + make_tuple(16, 64, &aom_sad16x64x3d_neon, -1), + make_tuple(16, 4, &aom_sad16x4x3d_neon, -1), + make_tuple(8, 32, &aom_sad8x32x3d_neon, -1), + make_tuple(4, 16, &aom_sad4x16x3d_neon, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16x3d_neon, 8), + make_tuple(16, 64, &aom_highbd_sad16x64x3d_neon, 8), + make_tuple(32, 8, &aom_highbd_sad32x8x3d_neon, 8), + make_tuple(8, 32, &aom_highbd_sad8x32x3d_neon, 8), + make_tuple(16, 4, &aom_highbd_sad16x4x3d_neon, 8), + make_tuple(4, 16, &aom_highbd_sad4x16x3d_neon, 8), + make_tuple(64, 16, &aom_highbd_sad64x16x3d_neon, 10), + make_tuple(16, 64, &aom_highbd_sad16x64x3d_neon, 10), + make_tuple(32, 8, &aom_highbd_sad32x8x3d_neon, 10), + make_tuple(8, 32, &aom_highbd_sad8x32x3d_neon, 10), + make_tuple(16, 4, &aom_highbd_sad16x4x3d_neon, 10), + make_tuple(4, 16, &aom_highbd_sad4x16x3d_neon, 10), + make_tuple(64, 16, &aom_highbd_sad64x16x3d_neon, 12), + make_tuple(16, 64, &aom_highbd_sad16x64x3d_neon, 12), + make_tuple(32, 8, &aom_highbd_sad32x8x3d_neon, 12), + make_tuple(8, 32, &aom_highbd_sad8x32x3d_neon, 12), + make_tuple(16, 4, &aom_highbd_sad16x4x3d_neon, 12), + make_tuple(4, 16, &aom_highbd_sad4x16x3d_neon, 12), +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON, SADx3Test, ::testing::ValuesIn(x3d_neon_tests)); + +#endif // HAVE_NEON + +#if HAVE_NEON_DOTPROD +const SadMxNParam neon_dotprod_tests[] = { + make_tuple(128, 128, &aom_sad128x128_neon_dotprod, -1), + make_tuple(128, 64, &aom_sad128x64_neon_dotprod, -1), + make_tuple(64, 128, &aom_sad64x128_neon_dotprod, -1), + make_tuple(64, 64, &aom_sad64x64_neon_dotprod, -1), + make_tuple(64, 32, &aom_sad64x32_neon_dotprod, -1), + make_tuple(32, 64, &aom_sad32x64_neon_dotprod, -1), + make_tuple(32, 32, &aom_sad32x32_neon_dotprod, -1), + make_tuple(32, 16, &aom_sad32x16_neon_dotprod, -1), + make_tuple(16, 32, &aom_sad16x32_neon_dotprod, -1), + make_tuple(16, 16, &aom_sad16x16_neon_dotprod, -1), + make_tuple(16, 8, &aom_sad16x8_neon_dotprod, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16_neon_dotprod, -1), + make_tuple(32, 8, &aom_sad32x8_neon_dotprod, -1), + make_tuple(16, 64, &aom_sad16x64_neon_dotprod, -1), + make_tuple(16, 4, &aom_sad16x4_neon_dotprod, -1), +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADTest, + ::testing::ValuesIn(neon_dotprod_tests)); + +const SadMxNParam skip_neon_dotprod_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128_neon_dotprod, -1), + make_tuple(128, 64, &aom_sad_skip_128x64_neon_dotprod, -1), + make_tuple(64, 128, &aom_sad_skip_64x128_neon_dotprod, -1), + make_tuple(64, 64, &aom_sad_skip_64x64_neon_dotprod, -1), + make_tuple(64, 32, &aom_sad_skip_64x32_neon_dotprod, -1), + make_tuple(32, 64, &aom_sad_skip_32x64_neon_dotprod, -1), + make_tuple(32, 32, &aom_sad_skip_32x32_neon_dotprod, -1), + make_tuple(32, 16, &aom_sad_skip_32x16_neon_dotprod, -1), + make_tuple(16, 32, &aom_sad_skip_16x32_neon_dotprod, -1), + make_tuple(16, 16, &aom_sad_skip_16x16_neon_dotprod, -1), + make_tuple(16, 8, &aom_sad_skip_16x8_neon_dotprod, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad_skip_64x16_neon_dotprod, -1), + make_tuple(32, 8, &aom_sad_skip_32x8_neon_dotprod, -1), + make_tuple(16, 64, &aom_sad_skip_16x64_neon_dotprod, -1), + make_tuple(16, 4, &aom_sad_skip_16x4_neon_dotprod, -1), +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADSkipTest, + ::testing::ValuesIn(skip_neon_dotprod_tests)); + +const SadMxNAvgParam avg_neon_dotprod_tests[] = { + make_tuple(128, 128, &aom_sad128x128_avg_neon_dotprod, -1), + make_tuple(128, 64, &aom_sad128x64_avg_neon_dotprod, -1), + make_tuple(64, 128, &aom_sad64x128_avg_neon_dotprod, -1), + make_tuple(64, 64, &aom_sad64x64_avg_neon_dotprod, -1), + make_tuple(64, 32, &aom_sad64x32_avg_neon_dotprod, -1), + make_tuple(32, 64, &aom_sad32x64_avg_neon_dotprod, -1), + make_tuple(32, 32, &aom_sad32x32_avg_neon_dotprod, -1), + make_tuple(32, 16, &aom_sad32x16_avg_neon_dotprod, -1), + make_tuple(16, 32, &aom_sad16x32_avg_neon_dotprod, -1), + make_tuple(16, 16, &aom_sad16x16_avg_neon_dotprod, -1), + make_tuple(16, 8, &aom_sad16x8_avg_neon_dotprod, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16_avg_neon_dotprod, -1), + make_tuple(32, 8, &aom_sad32x8_avg_neon_dotprod, -1), + make_tuple(16, 64, &aom_sad16x64_avg_neon_dotprod, -1), + make_tuple(16, 4, &aom_sad16x4_avg_neon_dotprod, -1), +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADavgTest, + ::testing::ValuesIn(avg_neon_dotprod_tests)); + +const DistWtdSadMxNAvgParam dist_wtd_avg_neon_dotprod_tests[] = { + make_tuple(128, 128, &aom_dist_wtd_sad128x128_avg_neon_dotprod, -1), + make_tuple(128, 64, &aom_dist_wtd_sad128x64_avg_neon_dotprod, -1), + make_tuple(64, 128, &aom_dist_wtd_sad64x128_avg_neon_dotprod, -1), + make_tuple(64, 64, &aom_dist_wtd_sad64x64_avg_neon_dotprod, -1), + make_tuple(64, 32, &aom_dist_wtd_sad64x32_avg_neon_dotprod, -1), + make_tuple(32, 64, &aom_dist_wtd_sad32x64_avg_neon_dotprod, -1), + make_tuple(32, 32, &aom_dist_wtd_sad32x32_avg_neon_dotprod, -1), + make_tuple(32, 16, &aom_dist_wtd_sad32x16_avg_neon_dotprod, -1), + make_tuple(16, 32, &aom_dist_wtd_sad16x32_avg_neon_dotprod, -1), + make_tuple(16, 16, &aom_dist_wtd_sad16x16_avg_neon_dotprod, -1), + make_tuple(16, 8, &aom_dist_wtd_sad16x8_avg_neon_dotprod, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_dist_wtd_sad64x16_avg_neon_dotprod, -1), + make_tuple(16, 64, &aom_dist_wtd_sad16x64_avg_neon_dotprod, -1), + make_tuple(32, 8, &aom_dist_wtd_sad32x8_avg_neon_dotprod, -1), + make_tuple(16, 4, &aom_dist_wtd_sad16x4_avg_neon_dotprod, -1), +#endif // !CONFIG_REALTIME_ONLY +}; + +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, DistWtdSADavgTest, + ::testing::ValuesIn(dist_wtd_avg_neon_dotprod_tests)); + +const SadMxNx4Param x3d_neon_dotprod_tests[] = { + make_tuple(128, 128, &aom_sad128x128x3d_neon_dotprod, -1), + make_tuple(128, 64, &aom_sad128x64x3d_neon_dotprod, -1), + make_tuple(64, 128, &aom_sad64x128x3d_neon_dotprod, -1), + make_tuple(64, 64, &aom_sad64x64x3d_neon_dotprod, -1), + make_tuple(64, 32, &aom_sad64x32x3d_neon_dotprod, -1), + make_tuple(32, 64, &aom_sad32x64x3d_neon_dotprod, -1), + make_tuple(32, 32, &aom_sad32x32x3d_neon_dotprod, -1), + make_tuple(32, 16, &aom_sad32x16x3d_neon_dotprod, -1), + make_tuple(16, 32, &aom_sad16x32x3d_neon_dotprod, -1), + make_tuple(16, 16, &aom_sad16x16x3d_neon_dotprod, -1), + make_tuple(16, 8, &aom_sad16x8x3d_neon_dotprod, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16x3d_neon_dotprod, -1), + make_tuple(32, 8, &aom_sad32x8x3d_neon_dotprod, -1), + make_tuple(16, 64, &aom_sad16x64x3d_neon_dotprod, -1), + make_tuple(16, 4, &aom_sad16x4x3d_neon_dotprod, -1), +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADx3Test, + ::testing::ValuesIn(x3d_neon_dotprod_tests)); + +const SadMxNx4Param x4d_neon_dotprod_tests[] = { + make_tuple(128, 128, &aom_sad128x128x4d_neon_dotprod, -1), + make_tuple(128, 64, &aom_sad128x64x4d_neon_dotprod, -1), + make_tuple(64, 128, &aom_sad64x128x4d_neon_dotprod, -1), + make_tuple(64, 64, &aom_sad64x64x4d_neon_dotprod, -1), + make_tuple(64, 32, &aom_sad64x32x4d_neon_dotprod, -1), + make_tuple(32, 64, &aom_sad32x64x4d_neon_dotprod, -1), + make_tuple(32, 32, &aom_sad32x32x4d_neon_dotprod, -1), + make_tuple(32, 16, &aom_sad32x16x4d_neon_dotprod, -1), + make_tuple(16, 32, &aom_sad16x32x4d_neon_dotprod, -1), + make_tuple(16, 16, &aom_sad16x16x4d_neon_dotprod, -1), + make_tuple(16, 8, &aom_sad16x8x4d_neon_dotprod, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16x4d_neon_dotprod, -1), + make_tuple(32, 8, &aom_sad32x8x4d_neon_dotprod, -1), + make_tuple(16, 64, &aom_sad16x64x4d_neon_dotprod, -1), + make_tuple(16, 4, &aom_sad16x4x4d_neon_dotprod, -1), +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADx4Test, + ::testing::ValuesIn(x4d_neon_dotprod_tests)); + +const SadSkipMxNx4Param skip_x4d_neon_dotprod_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128x4d_neon_dotprod, -1), + make_tuple(128, 64, &aom_sad_skip_128x64x4d_neon_dotprod, -1), + make_tuple(64, 128, &aom_sad_skip_64x128x4d_neon_dotprod, -1), + make_tuple(64, 64, &aom_sad_skip_64x64x4d_neon_dotprod, -1), + make_tuple(64, 32, &aom_sad_skip_64x32x4d_neon_dotprod, -1), + make_tuple(32, 64, &aom_sad_skip_32x64x4d_neon_dotprod, -1), + make_tuple(32, 32, &aom_sad_skip_32x32x4d_neon_dotprod, -1), + make_tuple(32, 16, &aom_sad_skip_32x16x4d_neon_dotprod, -1), + make_tuple(16, 32, &aom_sad_skip_16x32x4d_neon_dotprod, -1), + make_tuple(16, 16, &aom_sad_skip_16x16x4d_neon_dotprod, -1), + make_tuple(16, 8, &aom_sad_skip_16x8x4d_neon_dotprod, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad_skip_64x16x4d_neon_dotprod, -1), + make_tuple(32, 8, &aom_sad_skip_32x8x4d_neon_dotprod, -1), + make_tuple(16, 64, &aom_sad_skip_16x64x4d_neon_dotprod, -1), + make_tuple(16, 4, &aom_sad_skip_16x4x4d_neon_dotprod, -1), +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SADSkipx4Test, + ::testing::ValuesIn(skip_x4d_neon_dotprod_tests)); +#endif // HAVE_NEON_DOTPROD + +//------------------------------------------------------------------------------ +// x86 functions +#if HAVE_SSE2 +const SadMxNParam sse2_tests[] = { + make_tuple(128, 128, &aom_sad128x128_sse2, -1), + make_tuple(128, 64, &aom_sad128x64_sse2, -1), + make_tuple(64, 128, &aom_sad64x128_sse2, -1), + make_tuple(64, 64, &aom_sad64x64_sse2, -1), + make_tuple(64, 32, &aom_sad64x32_sse2, -1), + make_tuple(32, 64, &aom_sad32x64_sse2, -1), + make_tuple(32, 32, &aom_sad32x32_sse2, -1), + make_tuple(32, 16, &aom_sad32x16_sse2, -1), + make_tuple(16, 32, &aom_sad16x32_sse2, -1), + make_tuple(16, 16, &aom_sad16x16_sse2, -1), + make_tuple(16, 8, &aom_sad16x8_sse2, -1), + make_tuple(8, 16, &aom_sad8x16_sse2, -1), + make_tuple(8, 8, &aom_sad8x8_sse2, -1), + make_tuple(8, 4, &aom_sad8x4_sse2, -1), + make_tuple(4, 8, &aom_sad4x8_sse2, -1), + make_tuple(4, 4, &aom_sad4x4_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 8), + make_tuple(4, 8, &aom_highbd_sad4x8_sse2, 8), + make_tuple(4, 4, &aom_highbd_sad4x4_sse2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 10), + make_tuple(4, 8, &aom_highbd_sad4x8_sse2, 10), + make_tuple(4, 4, &aom_highbd_sad4x4_sse2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 12), + make_tuple(4, 8, &aom_highbd_sad4x8_sse2, 12), + make_tuple(4, 4, &aom_highbd_sad4x4_sse2, 12), +#endif +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16_sse2, -1), + make_tuple(16, 64, &aom_sad16x64_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16_sse2, 8), + make_tuple(16, 64, &aom_highbd_sad16x64_sse2, 8), + make_tuple(64, 16, &aom_highbd_sad64x16_sse2, 10), + make_tuple(16, 64, &aom_highbd_sad16x64_sse2, 10), + make_tuple(64, 16, &aom_highbd_sad64x16_sse2, 12), + make_tuple(16, 64, &aom_highbd_sad16x64_sse2, 12), +#endif + make_tuple(32, 8, &aom_sad32x8_sse2, -1), + make_tuple(8, 32, &aom_sad8x32_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(32, 8, &aom_highbd_sad32x8_sse2, 8), + make_tuple(8, 32, &aom_highbd_sad8x32_sse2, 8), + make_tuple(32, 8, &aom_highbd_sad32x8_sse2, 10), + make_tuple(8, 32, &aom_highbd_sad8x32_sse2, 10), + make_tuple(32, 8, &aom_highbd_sad32x8_sse2, 12), + make_tuple(8, 32, &aom_highbd_sad8x32_sse2, 12), +#endif + make_tuple(16, 4, &aom_sad16x4_sse2, -1), + make_tuple(4, 16, &aom_sad4x16_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 8), + make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 8), + make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 10), + make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 10), + make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 12), + make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 12), +#endif +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests)); + +const SadSkipMxNParam skip_sse2_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128_sse2, -1), + make_tuple(128, 64, &aom_sad_skip_128x64_sse2, -1), + make_tuple(64, 128, &aom_sad_skip_64x128_sse2, -1), + make_tuple(64, 64, &aom_sad_skip_64x64_sse2, -1), + make_tuple(64, 32, &aom_sad_skip_64x32_sse2, -1), + make_tuple(32, 64, &aom_sad_skip_32x64_sse2, -1), + make_tuple(32, 32, &aom_sad_skip_32x32_sse2, -1), + make_tuple(32, 16, &aom_sad_skip_32x16_sse2, -1), + make_tuple(16, 32, &aom_sad_skip_16x32_sse2, -1), + make_tuple(16, 16, &aom_sad_skip_16x16_sse2, -1), + make_tuple(16, 8, &aom_sad_skip_16x8_sse2, -1), + make_tuple(8, 16, &aom_sad_skip_8x16_sse2, -1), + make_tuple(8, 8, &aom_sad_skip_8x8_sse2, -1), + make_tuple(4, 8, &aom_sad_skip_4x8_sse2, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad_skip_64x16_sse2, -1), + make_tuple(16, 64, &aom_sad_skip_16x64_sse2, -1), + make_tuple(32, 8, &aom_sad_skip_32x8_sse2, -1), + make_tuple(8, 32, &aom_sad_skip_8x32_sse2, -1), + make_tuple(4, 16, &aom_sad_skip_4x16_sse2, -1), +#endif +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_sse2, 8), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_sse2, 8), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_sse2, 8), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 8), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 8), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 8), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 8), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 8), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 8), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 8), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 8), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16_sse2, 8), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_sse2, 8), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8_sse2, 8), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32_sse2, 8), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16_sse2, 8), +#endif + + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_sse2, 10), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_sse2, 10), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_sse2, 10), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 10), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 10), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 10), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 10), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 10), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 10), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 10), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 10), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16_sse2, 10), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_sse2, 10), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8_sse2, 10), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32_sse2, 10), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16_sse2, 10), +#endif + + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_sse2, 12), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_sse2, 12), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_sse2, 12), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 12), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 12), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 12), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 12), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 12), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 12), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 12), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 12), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16_sse2, 12), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_sse2, 12), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8_sse2, 12), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32_sse2, 12), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16_sse2, 12), +#endif +#endif // CONFIG_AV1_HIGHBITDEPTH +}; +INSTANTIATE_TEST_SUITE_P(SSE2, SADSkipTest, + ::testing::ValuesIn(skip_sse2_tests)); + +const SadMxNAvgParam avg_sse2_tests[] = { + make_tuple(128, 128, &aom_sad128x128_avg_sse2, -1), + make_tuple(128, 64, &aom_sad128x64_avg_sse2, -1), + make_tuple(64, 128, &aom_sad64x128_avg_sse2, -1), + make_tuple(64, 64, &aom_sad64x64_avg_sse2, -1), + make_tuple(64, 32, &aom_sad64x32_avg_sse2, -1), + make_tuple(32, 64, &aom_sad32x64_avg_sse2, -1), + make_tuple(32, 32, &aom_sad32x32_avg_sse2, -1), + make_tuple(32, 16, &aom_sad32x16_avg_sse2, -1), + make_tuple(16, 32, &aom_sad16x32_avg_sse2, -1), + make_tuple(16, 16, &aom_sad16x16_avg_sse2, -1), + make_tuple(16, 8, &aom_sad16x8_avg_sse2, -1), + make_tuple(8, 16, &aom_sad8x16_avg_sse2, -1), + make_tuple(8, 8, &aom_sad8x8_avg_sse2, -1), + make_tuple(8, 4, &aom_sad8x4_avg_sse2, -1), + make_tuple(4, 8, &aom_sad4x8_avg_sse2, -1), + make_tuple(4, 4, &aom_sad4x4_avg_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 8), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 8), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 8), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 8), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_sse2, 8), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_sse2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 10), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 10), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 10), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 10), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_sse2, 10), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_sse2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 12), + make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 12), + make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 12), + make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 12), + make_tuple(4, 8, &aom_highbd_sad4x8_avg_sse2, 12), + make_tuple(4, 4, &aom_highbd_sad4x4_avg_sse2, 12), +#endif +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16_avg_sse2, -1), + make_tuple(16, 64, &aom_sad16x64_avg_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16_avg_sse2, 8), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_sse2, 8), + make_tuple(64, 16, &aom_highbd_sad64x16_avg_sse2, 10), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_sse2, 10), + make_tuple(64, 16, &aom_highbd_sad64x16_avg_sse2, 12), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_sse2, 12), +#endif + make_tuple(32, 8, &aom_sad32x8_avg_sse2, -1), + make_tuple(8, 32, &aom_sad8x32_avg_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(32, 8, &aom_highbd_sad32x8_avg_sse2, 8), + make_tuple(8, 32, &aom_highbd_sad8x32_avg_sse2, 8), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_sse2, 10), + make_tuple(8, 32, &aom_highbd_sad8x32_avg_sse2, 10), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_sse2, 12), + make_tuple(8, 32, &aom_highbd_sad8x32_avg_sse2, 12), +#endif + make_tuple(16, 4, &aom_sad16x4_avg_sse2, -1), + make_tuple(4, 16, &aom_sad4x16_avg_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(16, 4, &aom_highbd_sad16x4_avg_sse2, 8), + make_tuple(4, 16, &aom_highbd_sad4x16_avg_sse2, 8), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_sse2, 10), + make_tuple(4, 16, &aom_highbd_sad4x16_avg_sse2, 10), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_sse2, 12), + make_tuple(4, 16, &aom_highbd_sad4x16_avg_sse2, 12), +#endif +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests)); + +const SadMxNx4Param x4d_sse2_tests[] = { + make_tuple(128, 128, &aom_sad128x128x4d_sse2, -1), + make_tuple(128, 64, &aom_sad128x64x4d_sse2, -1), + make_tuple(64, 128, &aom_sad64x128x4d_sse2, -1), + make_tuple(64, 64, &aom_sad64x64x4d_sse2, -1), + make_tuple(64, 32, &aom_sad64x32x4d_sse2, -1), + make_tuple(32, 64, &aom_sad32x64x4d_sse2, -1), + make_tuple(32, 32, &aom_sad32x32x4d_sse2, -1), + make_tuple(32, 16, &aom_sad32x16x4d_sse2, -1), + make_tuple(16, 32, &aom_sad16x32x4d_sse2, -1), + make_tuple(16, 16, &aom_sad16x16x4d_sse2, -1), + make_tuple(16, 8, &aom_sad16x8x4d_sse2, -1), + make_tuple(8, 16, &aom_sad8x16x4d_sse2, -1), + make_tuple(8, 8, &aom_sad8x8x4d_sse2, -1), + make_tuple(8, 4, &aom_sad8x4x4d_sse2, -1), + make_tuple(4, 8, &aom_sad4x8x4d_sse2, -1), + make_tuple(4, 4, &aom_sad4x4x4d_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 8), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 8), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 8), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 8), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 8), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 10), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 10), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 10), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 10), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 10), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 12), + make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 12), + make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 12), + make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 12), + make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 12), + make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 12), +#endif +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad64x16x4d_sse2, -1), + make_tuple(16, 64, &aom_sad16x64x4d_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 16, &aom_highbd_sad64x16x4d_sse2, 8), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_sse2, 8), + make_tuple(64, 16, &aom_highbd_sad64x16x4d_sse2, 10), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_sse2, 10), + make_tuple(64, 16, &aom_highbd_sad64x16x4d_sse2, 12), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_sse2, 12), +#endif + make_tuple(32, 8, &aom_sad32x8x4d_sse2, -1), + make_tuple(8, 32, &aom_sad8x32x4d_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(32, 8, &aom_highbd_sad32x8x4d_sse2, 8), + make_tuple(8, 32, &aom_highbd_sad8x32x4d_sse2, 8), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_sse2, 10), + make_tuple(8, 32, &aom_highbd_sad8x32x4d_sse2, 10), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_sse2, 12), + make_tuple(8, 32, &aom_highbd_sad8x32x4d_sse2, 12), +#endif + make_tuple(16, 4, &aom_sad16x4x4d_sse2, -1), + make_tuple(4, 16, &aom_sad4x16x4d_sse2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(16, 4, &aom_highbd_sad16x4x4d_sse2, 8), + make_tuple(4, 16, &aom_highbd_sad4x16x4d_sse2, 8), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_sse2, 10), + make_tuple(4, 16, &aom_highbd_sad4x16x4d_sse2, 10), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_sse2, 12), + make_tuple(4, 16, &aom_highbd_sad4x16x4d_sse2, 12), +#endif +#endif +}; +INSTANTIATE_TEST_SUITE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests)); + +const SadSkipMxNx4Param skip_x4d_sse2_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128x4d_sse2, -1), + make_tuple(128, 64, &aom_sad_skip_128x64x4d_sse2, -1), + make_tuple(64, 128, &aom_sad_skip_64x128x4d_sse2, -1), + make_tuple(64, 64, &aom_sad_skip_64x64x4d_sse2, -1), + make_tuple(64, 32, &aom_sad_skip_64x32x4d_sse2, -1), + make_tuple(32, 64, &aom_sad_skip_32x64x4d_sse2, -1), + make_tuple(32, 32, &aom_sad_skip_32x32x4d_sse2, -1), + make_tuple(32, 16, &aom_sad_skip_32x16x4d_sse2, -1), + make_tuple(16, 32, &aom_sad_skip_16x32x4d_sse2, -1), + make_tuple(16, 16, &aom_sad_skip_16x16x4d_sse2, -1), + make_tuple(16, 8, &aom_sad_skip_16x8x4d_sse2, -1), + make_tuple(8, 16, &aom_sad_skip_8x16x4d_sse2, -1), + make_tuple(8, 8, &aom_sad_skip_8x8x4d_sse2, -1), + make_tuple(4, 8, &aom_sad_skip_4x8x4d_sse2, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad_skip_64x16x4d_sse2, -1), + make_tuple(16, 64, &aom_sad_skip_16x64x4d_sse2, -1), + make_tuple(32, 8, &aom_sad_skip_32x8x4d_sse2, -1), + make_tuple(8, 32, &aom_sad_skip_8x32x4d_sse2, -1), + make_tuple(4, 16, &aom_sad_skip_4x16x4d_sse2, -1), +#endif +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_sse2, 8), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_sse2, 8), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_sse2, 8), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_sse2, 8), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_sse2, 8), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_sse2, 8), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_sse2, 8), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_sse2, 8), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_sse2, 8), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_sse2, 8), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_sse2, 8), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_sse2, 8), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_sse2, 8), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_sse2, 8), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_sse2, 8), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_sse2, 8), +#endif + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_sse2, 10), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_sse2, 10), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_sse2, 10), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_sse2, 10), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_sse2, 10), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_sse2, 10), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_sse2, 10), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_sse2, 10), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_sse2, 10), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_sse2, 10), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_sse2, 10), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_sse2, 10), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_sse2, 10), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_sse2, 10), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_sse2, 10), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_sse2, 10), +#endif + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_sse2, 12), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_sse2, 12), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_sse2, 12), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_sse2, 12), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_sse2, 12), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_sse2, 12), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_sse2, 12), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_sse2, 12), + make_tuple(8, 16, &aom_highbd_sad_skip_8x16x4d_sse2, 12), + make_tuple(8, 8, &aom_highbd_sad_skip_8x8x4d_sse2, 12), + make_tuple(4, 8, &aom_highbd_sad_skip_4x8x4d_sse2, 12), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_sse2, 12), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_sse2, 12), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_sse2, 12), + make_tuple(8, 32, &aom_highbd_sad_skip_8x32x4d_sse2, 12), + make_tuple(4, 16, &aom_highbd_sad_skip_4x16x4d_sse2, 12), +#endif +#endif // CONFIG_AV1_HIGHBITDEPTH +}; +INSTANTIATE_TEST_SUITE_P(SSE2, SADSkipx4Test, + ::testing::ValuesIn(skip_x4d_sse2_tests)); + +const DistWtdSadMxNAvgParam dist_wtd_avg_sse2_tests[] = { + make_tuple(128, 128, &aom_dist_wtd_sad128x128_avg_sse2, -1), + make_tuple(128, 64, &aom_dist_wtd_sad128x64_avg_sse2, -1), + make_tuple(64, 128, &aom_dist_wtd_sad64x128_avg_sse2, -1), + make_tuple(64, 64, &aom_dist_wtd_sad64x64_avg_sse2, -1), + make_tuple(64, 32, &aom_dist_wtd_sad64x32_avg_sse2, -1), + make_tuple(32, 64, &aom_dist_wtd_sad32x64_avg_sse2, -1), + make_tuple(32, 32, &aom_dist_wtd_sad32x32_avg_sse2, -1), + make_tuple(32, 16, &aom_dist_wtd_sad32x16_avg_sse2, -1), + make_tuple(16, 32, &aom_dist_wtd_sad16x32_avg_sse2, -1), + make_tuple(16, 16, &aom_dist_wtd_sad16x16_avg_sse2, -1), + make_tuple(16, 8, &aom_dist_wtd_sad16x8_avg_sse2, -1), + make_tuple(8, 16, &aom_dist_wtd_sad8x16_avg_sse2, -1), + make_tuple(8, 8, &aom_dist_wtd_sad8x8_avg_sse2, -1), + make_tuple(8, 4, &aom_dist_wtd_sad8x4_avg_sse2, -1), + make_tuple(4, 8, &aom_dist_wtd_sad4x8_avg_sse2, -1), + make_tuple(4, 4, &aom_dist_wtd_sad4x4_avg_sse2, -1), +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_dist_wtd_sad64x16_avg_sse2, -1), + make_tuple(16, 64, &aom_dist_wtd_sad16x64_avg_sse2, -1), + make_tuple(32, 8, &aom_dist_wtd_sad32x8_avg_sse2, -1), + make_tuple(8, 32, &aom_dist_wtd_sad8x32_avg_sse2, -1), + make_tuple(16, 4, &aom_dist_wtd_sad16x4_avg_sse2, -1), + make_tuple(4, 16, &aom_dist_wtd_sad4x16_avg_sse2, -1), +#endif +}; +INSTANTIATE_TEST_SUITE_P(sse2, DistWtdSADavgTest, + ::testing::ValuesIn(dist_wtd_avg_sse2_tests)); +#endif // HAVE_SSE2 + +#if HAVE_SSE3 +// Only functions are x3, which do not have tests. +#endif // HAVE_SSE3 + +#if HAVE_SSE4_1 +// Only functions are x8, which do not have tests. +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +const SadMxNParam avx2_tests[] = { + make_tuple(64, 128, &aom_sad64x128_avx2, -1), + make_tuple(128, 64, &aom_sad128x64_avx2, -1), + make_tuple(128, 128, &aom_sad128x128_avx2, -1), + make_tuple(64, 64, &aom_sad64x64_avx2, -1), + make_tuple(64, 32, &aom_sad64x32_avx2, -1), + make_tuple(32, 64, &aom_sad32x64_avx2, -1), + make_tuple(32, 32, &aom_sad32x32_avx2, -1), + make_tuple(32, 16, &aom_sad32x16_avx2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 8), + make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 10), + make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 12), + make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 12), + +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad64x16_avx2, 8), + make_tuple(64, 16, &aom_highbd_sad64x16_avx2, 10), + make_tuple(64, 16, &aom_highbd_sad64x16_avx2, 12), + make_tuple(16, 64, &aom_highbd_sad16x64_avx2, 8), + make_tuple(16, 64, &aom_highbd_sad16x64_avx2, 10), + make_tuple(16, 64, &aom_highbd_sad16x64_avx2, 12), + make_tuple(32, 8, &aom_highbd_sad32x8_avx2, 8), + make_tuple(32, 8, &aom_highbd_sad32x8_avx2, 10), + make_tuple(32, 8, &aom_highbd_sad32x8_avx2, 12), + make_tuple(16, 4, &aom_highbd_sad16x4_avx2, 8), + make_tuple(16, 4, &aom_highbd_sad16x4_avx2, 10), + make_tuple(16, 4, &aom_highbd_sad16x4_avx2, 12), +#endif +#endif +}; +INSTANTIATE_TEST_SUITE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests)); + +const SadSkipMxNParam skip_avx2_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128_avx2, -1), + make_tuple(128, 64, &aom_sad_skip_128x64_avx2, -1), + make_tuple(64, 128, &aom_sad_skip_64x128_avx2, -1), + make_tuple(64, 64, &aom_sad_skip_64x64_avx2, -1), + make_tuple(64, 32, &aom_sad_skip_64x32_avx2, -1), + make_tuple(32, 64, &aom_sad_skip_32x64_avx2, -1), + make_tuple(32, 32, &aom_sad_skip_32x32_avx2, -1), + make_tuple(32, 16, &aom_sad_skip_32x16_avx2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad_skip_128x128_avx2, 8), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64_avx2, 8), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128_avx2, 8), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_avx2, 8), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_avx2, 8), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_avx2, 8), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_avx2, 8), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_avx2, 8), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_avx2, 8), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_avx2, 8), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_avx2, 8), + + make_tuple(128, 128, &aom_highbd_sad_skip_128x128_avx2, 10), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64_avx2, 10), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128_avx2, 10), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_avx2, 10), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_avx2, 10), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_avx2, 10), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_avx2, 10), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_avx2, 10), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_avx2, 10), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_avx2, 10), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_avx2, 10), + + make_tuple(128, 128, &aom_highbd_sad_skip_128x128_avx2, 12), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64_avx2, 12), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128_avx2, 12), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64_avx2, 12), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32_avx2, 12), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64_avx2, 12), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32_avx2, 12), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16_avx2, 12), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32_avx2, 12), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16_avx2, 12), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8_avx2, 12), + +#if !CONFIG_REALTIME_ONLY + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_avx2, 8), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_avx2, 10), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64_avx2, 12), +#endif +#endif +}; +INSTANTIATE_TEST_SUITE_P(AVX2, SADSkipTest, + ::testing::ValuesIn(skip_avx2_tests)); + +const SadMxNAvgParam avg_avx2_tests[] = { + make_tuple(64, 128, &aom_sad64x128_avg_avx2, -1), + make_tuple(128, 64, &aom_sad128x64_avg_avx2, -1), + make_tuple(128, 128, &aom_sad128x128_avg_avx2, -1), + make_tuple(64, 64, &aom_sad64x64_avg_avx2, -1), + make_tuple(64, 32, &aom_sad64x32_avg_avx2, -1), + make_tuple(32, 64, &aom_sad32x64_avg_avx2, -1), + make_tuple(32, 32, &aom_sad32x32_avg_avx2, -1), + make_tuple(32, 16, &aom_sad32x16_avg_avx2, -1), +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 8), + make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 10), + make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 12), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 8), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 10), + make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 12), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 8), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 10), + make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 12), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 12), + +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad64x16_avg_avx2, 8), + make_tuple(64, 16, &aom_highbd_sad64x16_avg_avx2, 10), + make_tuple(64, 16, &aom_highbd_sad64x16_avg_avx2, 12), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_avx2, 8), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_avx2, 10), + make_tuple(16, 64, &aom_highbd_sad16x64_avg_avx2, 12), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_avx2, 8), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_avx2, 10), + make_tuple(32, 8, &aom_highbd_sad32x8_avg_avx2, 12), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_avx2, 8), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_avx2, 10), + make_tuple(16, 4, &aom_highbd_sad16x4_avg_avx2, 12), +#endif +#endif +}; +INSTANTIATE_TEST_SUITE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests)); + +const SadSkipMxNx4Param skip_x4d_avx2_tests[] = { + make_tuple(128, 128, &aom_sad_skip_128x128x4d_avx2, -1), + make_tuple(128, 64, &aom_sad_skip_128x64x4d_avx2, -1), + make_tuple(64, 128, &aom_sad_skip_64x128x4d_avx2, -1), + make_tuple(64, 64, &aom_sad_skip_64x64x4d_avx2, -1), + make_tuple(64, 32, &aom_sad_skip_64x32x4d_avx2, -1), + make_tuple(32, 64, &aom_sad_skip_32x64x4d_avx2, -1), + make_tuple(32, 32, &aom_sad_skip_32x32x4d_avx2, -1), + make_tuple(32, 16, &aom_sad_skip_32x16x4d_avx2, -1), + make_tuple(16, 32, &aom_sad_skip_16x32x4d_avx2, -1), + make_tuple(16, 16, &aom_sad_skip_16x16x4d_avx2, -1), + make_tuple(16, 8, &aom_sad_skip_16x8x4d_avx2, -1), + +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_avx2, 8), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_avx2, 8), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_avx2, 8), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_avx2, 8), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_avx2, 8), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_avx2, 8), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_avx2, 8), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_avx2, 8), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_avx2, 8), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_avx2, 8), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_avx2, 8), + + make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_avx2, 10), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_avx2, 10), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_avx2, 10), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_avx2, 10), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_avx2, 10), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_avx2, 10), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_avx2, 10), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_avx2, 10), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_avx2, 10), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_avx2, 10), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_avx2, 10), + + make_tuple(128, 128, &aom_highbd_sad_skip_128x128x4d_avx2, 12), + make_tuple(128, 64, &aom_highbd_sad_skip_128x64x4d_avx2, 12), + make_tuple(64, 128, &aom_highbd_sad_skip_64x128x4d_avx2, 12), + make_tuple(64, 64, &aom_highbd_sad_skip_64x64x4d_avx2, 12), + make_tuple(64, 32, &aom_highbd_sad_skip_64x32x4d_avx2, 12), + make_tuple(32, 64, &aom_highbd_sad_skip_32x64x4d_avx2, 12), + make_tuple(32, 32, &aom_highbd_sad_skip_32x32x4d_avx2, 12), + make_tuple(32, 16, &aom_highbd_sad_skip_32x16x4d_avx2, 12), + make_tuple(16, 32, &aom_highbd_sad_skip_16x32x4d_avx2, 12), + make_tuple(16, 16, &aom_highbd_sad_skip_16x16x4d_avx2, 12), + make_tuple(16, 8, &aom_highbd_sad_skip_16x8x4d_avx2, 12), + +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_avx2, 8), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_avx2, 8), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_avx2, 8), + + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_avx2, 10), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_avx2, 10), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_avx2, 10), + + make_tuple(64, 16, &aom_highbd_sad_skip_64x16x4d_avx2, 12), + make_tuple(32, 8, &aom_highbd_sad_skip_32x8x4d_avx2, 12), + make_tuple(16, 64, &aom_highbd_sad_skip_16x64x4d_avx2, 12), +#endif +#endif + +#if !CONFIG_REALTIME_ONLY + make_tuple(64, 16, &aom_sad_skip_64x16x4d_avx2, -1), + make_tuple(32, 8, &aom_sad_skip_32x8x4d_avx2, -1), + + make_tuple(16, 64, &aom_sad_skip_16x64x4d_avx2, -1), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, SADSkipx4Test, + ::testing::ValuesIn(skip_x4d_avx2_tests)); + +const SadMxNx4Param x4d_avx2_tests[] = { + make_tuple(16, 32, &aom_sad16x32x4d_avx2, -1), + make_tuple(16, 16, &aom_sad16x16x4d_avx2, -1), + make_tuple(16, 8, &aom_sad16x8x4d_avx2, -1), + make_tuple(32, 64, &aom_sad32x64x4d_avx2, -1), + make_tuple(32, 32, &aom_sad32x32x4d_avx2, -1), + make_tuple(32, 16, &aom_sad32x16x4d_avx2, -1), + make_tuple(64, 128, &aom_sad64x128x4d_avx2, -1), + make_tuple(64, 64, &aom_sad64x64x4d_avx2, -1), + make_tuple(64, 32, &aom_sad64x32x4d_avx2, -1), + make_tuple(128, 128, &aom_sad128x128x4d_avx2, -1), + make_tuple(128, 64, &aom_sad128x64x4d_avx2, -1), + +#if !CONFIG_REALTIME_ONLY + make_tuple(16, 64, &aom_sad16x64x4d_avx2, -1), + make_tuple(16, 4, &aom_sad16x4x4d_avx2, -1), + make_tuple(32, 8, &aom_sad32x8x4d_avx2, -1), + make_tuple(64, 16, &aom_sad64x16x4d_avx2, -1), +#endif + +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 8), + make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 10), + make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 12), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 8), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 10), + make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 12), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 8), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 10), + make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 12), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 12), + +#if !CONFIG_REALTIME_ONLY + make_tuple(16, 64, &aom_highbd_sad16x64x4d_avx2, 8), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_avx2, 10), + make_tuple(16, 64, &aom_highbd_sad16x64x4d_avx2, 12), + make_tuple(64, 16, &aom_highbd_sad64x16x4d_avx2, 8), + make_tuple(64, 16, &aom_highbd_sad64x16x4d_avx2, 10), + make_tuple(64, 16, &aom_highbd_sad64x16x4d_avx2, 12), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_avx2, 8), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_avx2, 10), + make_tuple(32, 8, &aom_highbd_sad32x8x4d_avx2, 12), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_avx2, 8), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_avx2, 10), + make_tuple(16, 4, &aom_highbd_sad16x4x4d_avx2, 12), +#endif +#endif +}; +INSTANTIATE_TEST_SUITE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests)); + +const SadMxNx4Param x3d_avx2_tests[] = { + make_tuple(32, 64, &aom_sad32x64x3d_avx2, -1), + make_tuple(32, 32, &aom_sad32x32x3d_avx2, -1), + make_tuple(32, 16, &aom_sad32x16x3d_avx2, -1), + make_tuple(64, 128, &aom_sad64x128x3d_avx2, -1), + make_tuple(64, 64, &aom_sad64x64x3d_avx2, -1), + make_tuple(64, 32, &aom_sad64x32x3d_avx2, -1), + make_tuple(128, 128, &aom_sad128x128x3d_avx2, -1), + make_tuple(128, 64, &aom_sad128x64x3d_avx2, -1), + +#if !CONFIG_REALTIME_ONLY + make_tuple(32, 8, &aom_sad32x8x3d_avx2, -1), + make_tuple(64, 16, &aom_sad64x16x3d_avx2, -1), +#endif // !CONFIG_REALTIME_ONLY + +#if CONFIG_AV1_HIGHBITDEPTH + make_tuple(128, 128, &aom_highbd_sad128x128x3d_avx2, 8), + make_tuple(128, 128, &aom_highbd_sad128x128x3d_avx2, 10), + make_tuple(128, 128, &aom_highbd_sad128x128x3d_avx2, 12), + make_tuple(128, 64, &aom_highbd_sad128x64x3d_avx2, 8), + make_tuple(128, 64, &aom_highbd_sad128x64x3d_avx2, 10), + make_tuple(128, 64, &aom_highbd_sad128x64x3d_avx2, 12), + make_tuple(64, 128, &aom_highbd_sad64x128x3d_avx2, 8), + make_tuple(64, 128, &aom_highbd_sad64x128x3d_avx2, 10), + make_tuple(64, 128, &aom_highbd_sad64x128x3d_avx2, 12), + make_tuple(64, 64, &aom_highbd_sad64x64x3d_avx2, 8), + make_tuple(64, 64, &aom_highbd_sad64x64x3d_avx2, 10), + make_tuple(64, 64, &aom_highbd_sad64x64x3d_avx2, 12), + make_tuple(64, 32, &aom_highbd_sad64x32x3d_avx2, 8), + make_tuple(64, 32, &aom_highbd_sad64x32x3d_avx2, 10), + make_tuple(64, 32, &aom_highbd_sad64x32x3d_avx2, 12), + make_tuple(32, 64, &aom_highbd_sad32x64x3d_avx2, 8), + make_tuple(32, 64, &aom_highbd_sad32x64x3d_avx2, 10), + make_tuple(32, 64, &aom_highbd_sad32x64x3d_avx2, 12), + make_tuple(32, 32, &aom_highbd_sad32x32x3d_avx2, 8), + make_tuple(32, 32, &aom_highbd_sad32x32x3d_avx2, 10), + make_tuple(32, 32, &aom_highbd_sad32x32x3d_avx2, 12), + make_tuple(32, 16, &aom_highbd_sad32x16x3d_avx2, 8), + make_tuple(32, 16, &aom_highbd_sad32x16x3d_avx2, 10), + make_tuple(32, 16, &aom_highbd_sad32x16x3d_avx2, 12), + make_tuple(16, 32, &aom_highbd_sad16x32x3d_avx2, 8), + make_tuple(16, 32, &aom_highbd_sad16x32x3d_avx2, 10), + make_tuple(16, 32, &aom_highbd_sad16x32x3d_avx2, 12), + make_tuple(16, 16, &aom_highbd_sad16x16x3d_avx2, 8), + make_tuple(16, 16, &aom_highbd_sad16x16x3d_avx2, 10), + make_tuple(16, 16, &aom_highbd_sad16x16x3d_avx2, 12), + make_tuple(16, 8, &aom_highbd_sad16x8x3d_avx2, 8), + make_tuple(16, 8, &aom_highbd_sad16x8x3d_avx2, 10), + make_tuple(16, 8, &aom_highbd_sad16x8x3d_avx2, 12), + +#if !CONFIG_REALTIME_ONLY + make_tuple(16, 64, &aom_highbd_sad16x64x3d_avx2, 8), + make_tuple(16, 64, &aom_highbd_sad16x64x3d_avx2, 10), + make_tuple(16, 64, &aom_highbd_sad16x64x3d_avx2, 12), + make_tuple(64, 16, &aom_highbd_sad64x16x3d_avx2, 8), + make_tuple(64, 16, &aom_highbd_sad64x16x3d_avx2, 10), + make_tuple(64, 16, &aom_highbd_sad64x16x3d_avx2, 12), + make_tuple(32, 8, &aom_highbd_sad32x8x3d_avx2, 8), + make_tuple(32, 8, &aom_highbd_sad32x8x3d_avx2, 10), + make_tuple(32, 8, &aom_highbd_sad32x8x3d_avx2, 12), + make_tuple(16, 4, &aom_highbd_sad16x4x3d_avx2, 8), + make_tuple(16, 4, &aom_highbd_sad16x4x3d_avx2, 10), + make_tuple(16, 4, &aom_highbd_sad16x4x3d_avx2, 12), +#endif // !CONFIG_REALTIME_ONLY +#endif // CONFIG_AV1_HIGHBITDEPTH +}; +INSTANTIATE_TEST_SUITE_P(AVX2, SADx3Test, ::testing::ValuesIn(x3d_avx2_tests)); +#endif // HAVE_AVX2 + +} // namespace diff --git a/third_party/aom/test/sb_multipass_test.cc b/third_party/aom/test/sb_multipass_test.cc new file mode 100644 index 0000000000..e27a2c60ee --- /dev/null +++ b/third_party/aom/test/sb_multipass_test.cc @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <initializer_list> +#include <string> +#include <vector> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/yuv_video_source.h" + +namespace { +class AV1SBMultipassTest + : public ::libaom_test::CodecTestWith2Params<int, bool>, + public ::libaom_test::EncoderTest { + protected: + AV1SBMultipassTest() + : EncoderTest(GET_PARAM(0)), set_cpu_used_(GET_PARAM(1)), + row_mt_(GET_PARAM(2)) { + init_flags_ = AOM_CODEC_USE_PSNR; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = 1280; + cfg.h = 720; + cfg.allow_lowbitdepth = 1; + decoder_ = codec_->CreateDecoder(cfg, 0); + if (decoder_->IsAV1()) { + decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder_->Control(AV1_SET_DECODE_TILE_COL, -1); + } + + size_enc_.clear(); + md5_dec_.clear(); + md5_enc_.clear(); + } + ~AV1SBMultipassTest() override { delete decoder_; } + + void SetUp() override { + InitializeConfig(::libaom_test::kTwoPassGood); + + cfg_.g_lag_in_frames = 5; + cfg_.rc_end_usage = AOM_VBR; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_maxsection_pct = 2000; + + cfg_.rc_max_quantizer = 56; + cfg_.rc_min_quantizer = 0; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + SetTileSize(encoder); + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_ENABLE_SB_MULTIPASS_UNIT_TEST, use_multipass_); + encoder->Control(AV1E_SET_ROW_MT, row_mt_); + + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + virtual void SetTileSize(libaom_test::Encoder *encoder) { + encoder->Control(AV1E_SET_TILE_COLUMNS, 1); + encoder->Control(AV1E_SET_TILE_ROWS, 1); + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + size_enc_.push_back(pkt->data.frame.sz); + + ::libaom_test::MD5 md5_enc; + md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz); + md5_enc_.push_back(md5_enc.Get()); + + const aom_codec_err_t res = decoder_->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + const aom_image_t *img = decoder_->GetDxData().Next(); + + if (img) { + ::libaom_test::MD5 md5_res; + md5_res.Add(img); + md5_dec_.push_back(md5_res.Get()); + } + } + + void DoTest() { + ::libaom_test::YUVVideoSource video( + "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, 640, 480, 30, 1, 0, 6); + cfg_.rc_target_bitrate = 1000; + + // Encode while coding each sb once + use_multipass_ = false; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::vector<size_t> single_pass_size_enc; + std::vector<std::string> single_pass_md5_enc; + std::vector<std::string> single_pass_md5_dec; + single_pass_size_enc = size_enc_; + single_pass_md5_enc = md5_enc_; + single_pass_md5_dec = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Encode while coding each sb twice + use_multipass_ = true; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + std::vector<size_t> multi_pass_size_enc; + std::vector<std::string> multi_pass_md5_enc; + std::vector<std::string> multi_pass_md5_dec; + multi_pass_size_enc = size_enc_; + multi_pass_md5_enc = md5_enc_; + multi_pass_md5_dec = md5_dec_; + size_enc_.clear(); + md5_enc_.clear(); + md5_dec_.clear(); + + // Check that the vectors are equal. + ASSERT_EQ(single_pass_size_enc, multi_pass_size_enc); + ASSERT_EQ(single_pass_md5_enc, multi_pass_md5_enc); + ASSERT_EQ(single_pass_md5_dec, multi_pass_md5_dec); + } + + bool use_multipass_; + int set_cpu_used_; + bool row_mt_; + ::libaom_test::Decoder *decoder_; + std::vector<size_t> size_enc_; + std::vector<std::string> md5_enc_; + std::vector<std::string> md5_dec_; +}; + +TEST_P(AV1SBMultipassTest, TwoPassMatchTest) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(AV1SBMultipassTest, ::testing::Range(4, 6), + ::testing::Bool()); + +} // namespace diff --git a/third_party/aom/test/sb_qp_sweep_test.cc b/third_party/aom/test/sb_qp_sweep_test.cc new file mode 100644 index 0000000000..6c76a40b2a --- /dev/null +++ b/third_party/aom/test/sb_qp_sweep_test.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2022, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <initializer_list> +#include <memory> +#include <string> +#include <vector> + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/yuv_video_source.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +// Parameters: cpu-used, row-mt. +class AV1SBQPSweepTest : public ::libaom_test::CodecTestWith2Params<int, bool>, + public ::libaom_test::EncoderTest { + protected: + AV1SBQPSweepTest() + : EncoderTest(GET_PARAM(0)), set_cpu_used_(GET_PARAM(1)), + row_mt_(GET_PARAM(2)) { + init_flags_ = AOM_CODEC_USE_PSNR; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = 1280; + cfg.h = 720; + cfg.allow_lowbitdepth = 1; + decoder_ = + std::unique_ptr<::libaom_test::Decoder>(codec_->CreateDecoder(cfg, 0)); + } + ~AV1SBQPSweepTest() override = default; + + void SetUp() override { + InitializeConfig(::libaom_test::kTwoPassGood); + + ASSERT_NE(decoder_, nullptr); + if (decoder_->IsAV1()) { + decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1); + decoder_->Control(AV1_SET_DECODE_TILE_COL, -1); + } + + cfg_.g_lag_in_frames = 5; + cfg_.rc_end_usage = AOM_Q; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + SetTileSize(encoder); + encoder->Control(AOME_SET_CPUUSED, set_cpu_used_); + encoder->Control(AV1E_ENABLE_SB_QP_SWEEP, use_sb_sweep_); + encoder->Control(AV1E_SET_ROW_MT, row_mt_); + + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } + } + + virtual void SetTileSize(libaom_test::Encoder *encoder) { + encoder->Control(AV1E_SET_TILE_COLUMNS, 1); + encoder->Control(AV1E_SET_TILE_ROWS, 1); + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetAverageFrameSize() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + sum_frame_size_ += pkt->data.frame.sz; + + const aom_codec_err_t res = decoder_->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + } + + void DoTest() { + ::libaom_test::YUVVideoSource video( + "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, 640, 480, 30, 1, 0, 6); + cfg_.rc_target_bitrate = 1000; + + // Encode without sb_qp_sweep + use_sb_sweep_ = false; + sum_frame_size_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_1 = GetAveragePsnr(); + const size_t avg_frame_size_1 = sum_frame_size_ / nframes_; + + // Encode with sb_qp_sweep + use_sb_sweep_ = true; + sum_frame_size_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + const double psnr_2 = GetAveragePsnr(); + const size_t avg_frame_size_2 = sum_frame_size_ / nframes_; + + if (psnr_1 >= psnr_2) { + ASSERT_GE(avg_frame_size_1, avg_frame_size_2); + } + if (avg_frame_size_1 <= avg_frame_size_2) { + ASSERT_LE(psnr_1, psnr_2); + } + } + + bool use_sb_sweep_; + int set_cpu_used_; + bool row_mt_; + double psnr_; + unsigned int nframes_; + size_t sum_frame_size_; + std::unique_ptr<::libaom_test::Decoder> decoder_; +}; + +TEST_P(AV1SBQPSweepTest, SweepMatchTest) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(AV1SBQPSweepTest, ::testing::Range(4, 6), + ::testing::Bool()); + +} // namespace diff --git a/third_party/aom/test/scalability_test.cc b/third_party/aom/test/scalability_test.cc new file mode 100644 index 0000000000..12cb03cac4 --- /dev/null +++ b/third_party/aom/test/scalability_test.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +const int kCpuUsed = 8; +const int kBaseLayerQp = 55; +const int kEnhancementLayerQp = 20; + +class ScalabilityTest + : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>, + public ::libaom_test::EncoderTest { + protected: + ScalabilityTest() : EncoderTest(GET_PARAM(0)) {} + ~ScalabilityTest() override = default; + + void SetUp() override { + InitializeConfig(GET_PARAM(1)); + num_spatial_layers_ = 2; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); + encoder->Control(AOME_SET_NUMBER_SPATIAL_LAYERS, num_spatial_layers_); + } + if (video->frame() % num_spatial_layers_) { + frame_flags_ = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY; + encoder->Control(AOME_SET_SPATIAL_LAYER_ID, 1); + encoder->Control(AOME_SET_CQ_LEVEL, kEnhancementLayerQp); + } else { + frame_flags_ = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF | + AOM_EFLAG_NO_UPD_ENTROPY; + encoder->Control(AOME_SET_SPATIAL_LAYER_ID, 0); + encoder->Control(AOME_SET_CQ_LEVEL, kBaseLayerQp); + } + } + + void DoTest(int num_spatial_layers) { + num_spatial_layers_ = num_spatial_layers; + cfg_.rc_end_usage = AOM_Q; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 18); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + int num_spatial_layers_; +}; + +TEST_P(ScalabilityTest, TestNoMismatch2SpatialLayers) { DoTest(2); } + +TEST_P(ScalabilityTest, TestNoMismatch3SpatialLayers) { DoTest(3); } + +AV1_INSTANTIATE_TEST_SUITE(ScalabilityTest, + ::testing::Values(::libaom_test::kRealTime)); + +} // namespace diff --git a/third_party/aom/test/scan_test.cc b/third_party/aom/test/scan_test.cc new file mode 100644 index 0000000000..571658ee0a --- /dev/null +++ b/third_party/aom/test/scan_test.cc @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "av1/common/scan.h" +#include "av1/common/txb_common.h" +#include "test/av1_txfm_test.h" + +static int scan_test(const int16_t *scan, const int16_t *iscan, int si, int r, + int c, int h) { + if (iscan[c * h + r] != si || scan[si] != c * h + r) { + printf("r %d c %d ref_iscan %d iscan %d ref_scan %d scan %d\n", r, c, si, + iscan[c * h + r], c * h + r, scan[si]); + return 1; + } else { + return 0; + } +} + +int scan_order_test(const SCAN_ORDER *scan_order, int w, int h, + SCAN_MODE mode) { + const int16_t *scan = scan_order->scan; + const int16_t *iscan = scan_order->iscan; + int dim = w + h - 1; + if (mode == SCAN_MODE_ZIG_ZAG) { + int si = 0; + for (int i = 0; i < dim; ++i) { + if (i % 2 == 0) { + for (int c = 0; c < w; ++c) { + int r = i - c; + if (r >= 0 && r < h) { + if (scan_test(scan, iscan, si, r, c, h)) return 1; + ++si; + } + } + } else { + for (int r = 0; r < h; ++r) { + int c = i - r; + if (c >= 0 && c < w) { + if (scan_test(scan, iscan, si, r, c, h)) return 1; + ++si; + } + } + } + } + } else if (mode == SCAN_MODE_COL_DIAG) { + int si = 0; + for (int i = 0; i < dim; ++i) { + for (int c = 0; c < w; ++c) { + int r = i - c; + if (r >= 0 && r < h) { + if (scan_test(scan, iscan, si, r, c, h)) return 1; + ++si; + } + } + } + } else if (mode == SCAN_MODE_ROW_DIAG) { + int si = 0; + for (int i = 0; i < dim; ++i) { + for (int r = 0; r < h; ++r) { + int c = i - r; + if (c >= 0 && c < w) { + if (scan_test(scan, iscan, si, r, c, h)) return 1; + ++si; + } + } + } + } else if (mode == SCAN_MODE_ROW_1D) { + int si = 0; + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { + if (scan_test(scan, iscan, si, r, c, h)) return 1; + ++si; + } + } + } else { + assert(mode == SCAN_MODE_COL_1D); + int si = 0; + for (int c = 0; c < w; ++c) { + for (int r = 0; r < h; ++r) { + if (scan_test(scan, iscan, si, r, c, h)) return 1; + ++si; + } + } + } + return 0; +} + +TEST(Av1ScanTest, Dependency) { + for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) { + const int org_rows = tx_size_high[(TX_SIZE)tx_size]; + const int org_cols = tx_size_wide[(TX_SIZE)tx_size]; + const int rows = get_txb_high((TX_SIZE)tx_size); + const int cols = get_txb_wide((TX_SIZE)tx_size); + for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size), + static_cast<TX_TYPE>(tx_type)) == + false) { + continue; + } + SCAN_MODE scan_mode; + TX_CLASS tx_class = tx_type_to_class[(TX_TYPE)tx_type]; + if (tx_class == TX_CLASS_2D) { + if (rows == cols) { + scan_mode = SCAN_MODE_ZIG_ZAG; + } else if (rows > cols) { + scan_mode = SCAN_MODE_ROW_DIAG; + } else { + scan_mode = SCAN_MODE_COL_DIAG; + } + } else if (tx_class == TX_CLASS_VERT) { + scan_mode = SCAN_MODE_ROW_1D; + } else { + assert(tx_class == TX_CLASS_HORIZ); + scan_mode = SCAN_MODE_COL_1D; + } + const SCAN_ORDER *scan_order = + get_default_scan((TX_SIZE)tx_size, (TX_TYPE)tx_type); + ASSERT_EQ(scan_order_test(scan_order, cols, rows, scan_mode), 0) + << "scan mismatch tx_class " << tx_class << " tx_type " << tx_type + << " tx_w " << org_cols << " tx_h " << org_rows << " scan_mode " + << scan_mode << "\n"; + } + } +} diff --git a/third_party/aom/test/screen_content_test.cc b/third_party/aom/test/screen_content_test.cc new file mode 100644 index 0000000000..974c50b3c6 --- /dev/null +++ b/third_party/aom/test/screen_content_test.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include "aom/aom_codec.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/y4m_video_source.h" +#include "test/util.h" + +namespace { +// This class is used to validate if screen_content_tools are turned on +// appropriately. +class ScreenContentToolsTestLarge + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, + aom_rc_mode>, + public ::libaom_test::EncoderTest { + protected: + ScreenContentToolsTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + rc_end_usage_(GET_PARAM(2)) { + is_screen_content_violated_ = true; + tune_content_ = AOM_CONTENT_DEFAULT; + } + ~ScreenContentToolsTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = rc_end_usage_; + cfg_.g_threads = 1; + cfg_.g_lag_in_frames = 35; + cfg_.rc_target_bitrate = 1000; + cfg_.g_profile = 0; + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AV1E_SET_TUNE_CONTENT, tune_content_); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + aom_screen_content_tools_info sc_info; + + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_SCREEN_CONTENT_TOOLS_INFO, + &sc_info); + if (sc_info.allow_screen_content_tools == 1) { + is_screen_content_violated_ = false; + } + } + return AOM_CODEC_OK == res_dec; + } + + ::libaom_test::TestMode encoding_mode_; + bool is_screen_content_violated_; + int tune_content_; + aom_rc_mode rc_end_usage_; +}; + +TEST_P(ScreenContentToolsTestLarge, ScreenContentToolsTest) { + // force screen content tools on + ::libaom_test::Y4mVideoSource video_nonsc("park_joy_90p_8_444.y4m", 0, 1); + cfg_.g_profile = 1; + tune_content_ = AOM_CONTENT_SCREEN; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video_nonsc)); + ASSERT_EQ(is_screen_content_violated_, false) + << "Failed for tune_content_ = AOM_CONTENT_SCREEN"; + + // Don't force screen content, however as the input is screen content + // allow_screen_content_tools should still be turned on + ::libaom_test::Y4mVideoSource video_sc("desktop_credits.y4m", 0, 1); + cfg_.g_profile = 1; + is_screen_content_violated_ = true; + tune_content_ = AOM_CONTENT_DEFAULT; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video_sc)); + ASSERT_EQ(is_screen_content_violated_, false) + << "Failed detection of screen content"; + + // TODO(anyone): Enable below test once low resolution screen content + // detection issues are fixed. + // low resolution test + // ::libaom_test::Y4mVideoSource video_sc("screendata.y4m", 0, 1); + // cfg_.g_profile = 0; + // is_screen_content_violated_ = true; + // tune_content_ = AOM_CONTENT_DEFAULT; + // ASSERT_NO_FATAL_FAILURE(RunLoop(&video_sc)); + // ASSERT_EQ(is_screen_content_violated_, false) + // << "Failed detection of screen content(lowres)"; +} + +AV1_INSTANTIATE_TEST_SUITE(ScreenContentToolsTestLarge, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood), + ::testing::Values(AOM_Q)); + +class ScreenContentToolsMultiThreadTestLarge + : public ScreenContentToolsTestLarge {}; + +TEST_P(ScreenContentToolsMultiThreadTestLarge, ScreenContentToolsTest) { + // Don't force screen content, however as the input is screen content + // allow_screen_content_tools should still be turned on even with + // multi-threaded encoding. + ::libaom_test::Y4mVideoSource video_sc("desktop_credits.y4m", 0, 10); + cfg_.g_profile = 1; + cfg_.g_threads = 4; + is_screen_content_violated_ = true; + tune_content_ = AOM_CONTENT_DEFAULT; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video_sc)); + ASSERT_EQ(is_screen_content_violated_, false) + << "Failed detection of screen content"; +} + +AV1_INSTANTIATE_TEST_SUITE(ScreenContentToolsMultiThreadTestLarge, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood), + ::testing::Values(AOM_Q)); +} // namespace diff --git a/third_party/aom/test/segment_binarization_sync.cc b/third_party/aom/test/segment_binarization_sync.cc new file mode 100644 index 0000000000..bd8cf11410 --- /dev/null +++ b/third_party/aom/test/segment_binarization_sync.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" + +using libaom_test::ACMRandom; + +extern "C" { +int av1_neg_interleave(int x, int ref, int max); +int av1_neg_deinterleave(int diff, int ref, int max); +} + +namespace { + +struct Segment { + int id; + int pred; + int last_id; +}; + +Segment GenerateSegment(int seed) { + static const int MAX_SEGMENTS = 8; + + ACMRandom rnd_(seed); + + Segment segment; + const int last_segid = rnd_.PseudoUniform(MAX_SEGMENTS); + segment.last_id = last_segid; + segment.pred = rnd_.PseudoUniform(MAX_SEGMENTS); + segment.id = rnd_.PseudoUniform(last_segid + 1); + + return segment; +} + +// Try to reveal a mismatch between segment binarization and debinarization +TEST(SegmentBinarizationSync, SearchForBinarizationMismatch) { + const int count_tests = 1000; + const int seed_init = 4321; + + for (int i = 0; i < count_tests; ++i) { + const Segment seg = GenerateSegment(seed_init + i); + + const int max_segid = seg.last_id + 1; + const int seg_diff = av1_neg_interleave(seg.id, seg.pred, max_segid); + const int decoded_segid = + av1_neg_deinterleave(seg_diff, seg.pred, max_segid); + + ASSERT_EQ(decoded_segid, seg.id); + } +} + +} // namespace diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc new file mode 100644 index 0000000000..3dd513b6e0 --- /dev/null +++ b/third_party/aom/test/selfguided_filter_test.cc @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <ctime> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/av1_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +#include "aom_ports/aom_timer.h" +#include "av1/common/mv.h" +#include "av1/common/restoration.h" + +namespace { + +using libaom_test::ACMRandom; +using std::make_tuple; +using std::tuple; + +typedef int (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride, + int eps, const int *xqd, uint8_t *dst8, int dst_stride, + int32_t *tmpbuf, int bit_depth, int highbd); + +// Test parameter list: +// <tst_fun_> +typedef tuple<SgrFunc> FilterTestParam; + +class AV1SelfguidedFilterTest + : public ::testing::TestWithParam<FilterTestParam> { + public: + ~AV1SelfguidedFilterTest() override = default; + void SetUp() override {} + + protected: + void RunSpeedTest() { + tst_fun_ = GET_PARAM(0); + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + const int width = 256, height = 256, stride = 288, out_stride = 288; + const int NUM_ITERS = 2000; + int i, j, k; + + uint8_t *input_ = + (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t)); + ASSERT_NE(input_, nullptr); + uint8_t *output_ = (uint8_t *)aom_memalign( + 32, out_stride * (height + 32) * sizeof(uint8_t)); + ASSERT_NE(output_, nullptr); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); + ASSERT_NE(tmpbuf, nullptr); + uint8_t *input = input_ + stride * 16 + 16; + uint8_t *output = output_ + out_stride * 16 + 16; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (i = -16; i < height + 16; ++i) + for (j = -16; j < width + 16; ++j) + input[i * stride + j] = rnd.Rand16() & 0xFF; + + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; + // Fix a parameter set, since the speed depends slightly on r. + // Change this to test different combinations of values of r. + int eps = 15; + + av1_loop_restoration_precal(); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint8_t *input_p = input + k * stride + j; + uint8_t *output_p = output + k * out_stride + j; + const int ret_c = av1_apply_selfguided_restoration_c( + input_p, w, h, stride, eps, xqd, output_p, out_stride, tmpbuf, 8, + 0); + ASSERT_EQ(ret_c, 0); + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint8_t *input_p = input + k * stride + j; + uint8_t *output_p = output + k * out_stride + j; + const int ret_tst = tst_fun_(input_p, w, h, stride, eps, xqd, + output_p, out_stride, tmpbuf, 8, 0); + ASSERT_EQ(ret_tst, 0); + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + + aom_free(input_); + aom_free(output_); + aom_free(tmpbuf); + } + + void RunCorrectnessTest() { + tst_fun_ = GET_PARAM(0); + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + // Set the maximum width/height to test here. We actually test a small + // range of sizes *up to* this size, so that we can check, eg., + // the behaviour on tiles which are not a multiple of 4 wide. + const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; + const int NUM_ITERS = 81; + int i, j, k; + + uint8_t *input_ = + (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t)); + ASSERT_NE(input_, nullptr); + uint8_t *output_ = (uint8_t *)aom_memalign( + 32, out_stride * (max_h + 32) * sizeof(uint8_t)); + ASSERT_NE(output_, nullptr); + uint8_t *output2_ = (uint8_t *)aom_memalign( + 32, out_stride * (max_h + 32) * sizeof(uint8_t)); + ASSERT_NE(output2_, nullptr); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); + ASSERT_NE(tmpbuf, nullptr); + + uint8_t *input = input_ + stride * 16 + 16; + uint8_t *output = output_ + out_stride * 16 + 16; + uint8_t *output2 = output2_ + out_stride * 16 + 16; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + av1_loop_restoration_precal(); + + for (i = 0; i < NUM_ITERS; ++i) { + for (j = -16; j < max_h + 16; ++j) + for (k = -16; k < max_w + 16; ++k) + input[j * stride + k] = rnd.Rand16() & 0xFF; + + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; + int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); + + // Test various tile sizes around 256x256 + int test_w = max_w - (i / 9); + int test_h = max_h - (i % 9); + + for (k = 0; k < test_h; k += pu_height) + for (j = 0; j < test_w; j += pu_width) { + int w = AOMMIN(pu_width, test_w - j); + int h = AOMMIN(pu_height, test_h - k); + uint8_t *input_p = input + k * stride + j; + uint8_t *output_p = output + k * out_stride + j; + uint8_t *output2_p = output2 + k * out_stride + j; + const int ret_tst = tst_fun_(input_p, w, h, stride, eps, xqd, + output_p, out_stride, tmpbuf, 8, 0); + ASSERT_EQ(ret_tst, 0); + const int ret_c = av1_apply_selfguided_restoration_c( + input_p, w, h, stride, eps, xqd, output2_p, out_stride, tmpbuf, 8, + 0); + ASSERT_EQ(ret_c, 0); + } + + for (j = 0; j < test_h; ++j) + for (k = 0; k < test_w; ++k) { + ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); + } + } + + aom_free(input_); + aom_free(output_); + aom_free(output2_); + aom_free(tmpbuf); + } + + private: + SgrFunc tst_fun_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1SelfguidedFilterTest); + +TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } +TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1SelfguidedFilterTest, + ::testing::Values(av1_apply_selfguided_restoration_sse4_1)); +#endif + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1SelfguidedFilterTest, + ::testing::Values(av1_apply_selfguided_restoration_avx2)); +#endif + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1SelfguidedFilterTest, + ::testing::Values(av1_apply_selfguided_restoration_neon)); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +// Test parameter list: +// <tst_fun_, bit_depth> +typedef tuple<SgrFunc, int> HighbdFilterTestParam; + +class AV1HighbdSelfguidedFilterTest + : public ::testing::TestWithParam<HighbdFilterTestParam> { + public: + ~AV1HighbdSelfguidedFilterTest() override = default; + void SetUp() override {} + + protected: + void RunSpeedTest() { + tst_fun_ = GET_PARAM(0); + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + const int width = 256, height = 256, stride = 288, out_stride = 288; + const int NUM_ITERS = 2000; + int i, j, k; + int bit_depth = GET_PARAM(1); + int mask = (1 << bit_depth) - 1; + + uint16_t *input_ = + (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t)); + ASSERT_NE(input_, nullptr); + uint16_t *output_ = (uint16_t *)aom_memalign( + 32, out_stride * (height + 32) * sizeof(uint16_t)); + ASSERT_NE(output_, nullptr); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); + ASSERT_NE(tmpbuf, nullptr); + uint16_t *input = input_ + stride * 16 + 16; + uint16_t *output = output_ + out_stride * 16 + 16; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (i = -16; i < height + 16; ++i) + for (j = -16; j < width + 16; ++j) + input[i * stride + j] = rnd.Rand16() & mask; + + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; + // Fix a parameter set, since the speed depends slightly on r. + // Change this to test different combinations of values of r. + int eps = 15; + + av1_loop_restoration_precal(); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint16_t *input_p = input + k * stride + j; + uint16_t *output_p = output + k * out_stride + j; + av1_apply_selfguided_restoration_c( + CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1); + } + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); + + aom_usec_timer tst_timer; + aom_usec_timer_start(&tst_timer); + for (i = 0; i < NUM_ITERS; ++i) { + for (k = 0; k < height; k += pu_height) + for (j = 0; j < width; j += pu_width) { + int w = AOMMIN(pu_width, width - j); + int h = AOMMIN(pu_height, height - k); + uint16_t *input_p = input + k * stride + j; + uint16_t *output_p = output + k * out_stride + j; + tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, + 1); + } + } + aom_usec_timer_mark(&tst_timer); + const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); + + std::cout << "[ ] C time = " << ref_time / 1000 + << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; + + EXPECT_GT(ref_time, tst_time) + << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than " + "C.\n" + << "C time: " << ref_time << " us\n" + << "SIMD time: " << tst_time << " us\n"; + + aom_free(input_); + aom_free(output_); + aom_free(tmpbuf); + } + + void RunCorrectnessTest() { + tst_fun_ = GET_PARAM(0); + const int pu_width = RESTORATION_PROC_UNIT_SIZE; + const int pu_height = RESTORATION_PROC_UNIT_SIZE; + // Set the maximum width/height to test here. We actually test a small + // range of sizes *up to* this size, so that we can check, eg., + // the behaviour on tiles which are not a multiple of 4 wide. + const int max_w = 260, max_h = 260, stride = 672, out_stride = 672; + const int NUM_ITERS = 81; + int i, j, k; + int bit_depth = GET_PARAM(1); + int mask = (1 << bit_depth) - 1; + + uint16_t *input_ = + (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t)); + ASSERT_NE(input_, nullptr); + uint16_t *output_ = (uint16_t *)aom_memalign( + 32, out_stride * (max_h + 32) * sizeof(uint16_t)); + ASSERT_NE(output_, nullptr); + uint16_t *output2_ = (uint16_t *)aom_memalign( + 32, out_stride * (max_h + 32) * sizeof(uint16_t)); + ASSERT_NE(output2_, nullptr); + int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE); + ASSERT_NE(tmpbuf, nullptr); + + uint16_t *input = input_ + stride * 16 + 16; + uint16_t *output = output_ + out_stride * 16 + 16; + uint16_t *output2 = output2_ + out_stride * 16 + 16; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + av1_loop_restoration_precal(); + + for (i = 0; i < NUM_ITERS; ++i) { + for (j = -16; j < max_h + 16; ++j) + for (k = -16; k < max_w + 16; ++k) + input[j * stride + k] = rnd.Rand16() & mask; + + int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - + SGRPROJ_PRJ_MIN0), + SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - + SGRPROJ_PRJ_MIN1) }; + int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS); + + // Test various tile sizes around 256x256 + int test_w = max_w - (i / 9); + int test_h = max_h - (i % 9); + + for (k = 0; k < test_h; k += pu_height) + for (j = 0; j < test_w; j += pu_width) { + int w = AOMMIN(pu_width, test_w - j); + int h = AOMMIN(pu_height, test_h - k); + uint16_t *input_p = input + k * stride + j; + uint16_t *output_p = output + k * out_stride + j; + uint16_t *output2_p = output2 + k * out_stride + j; + tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, + 1); + av1_apply_selfguided_restoration_c( + CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd, + CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1); + } + + for (j = 0; j < test_h; ++j) + for (k = 0; k < test_w; ++k) + ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]); + } + + aom_free(input_); + aom_free(output_); + aom_free(output2_); + aom_free(tmpbuf); + } + + private: + SgrFunc tst_fun_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdSelfguidedFilterTest); + +TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); } +TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); } + +#if HAVE_SSE4_1 +const int highbd_params_sse4_1[] = { 8, 10, 12 }; +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1HighbdSelfguidedFilterTest, + ::testing::Combine( + ::testing::Values(av1_apply_selfguided_restoration_sse4_1), + ::testing::ValuesIn(highbd_params_sse4_1))); +#endif + +#if HAVE_AVX2 +const int highbd_params_avx2[] = { 8, 10, 12 }; +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1HighbdSelfguidedFilterTest, + ::testing::Combine(::testing::Values(av1_apply_selfguided_restoration_avx2), + ::testing::ValuesIn(highbd_params_avx2))); +#endif + +#if HAVE_NEON +const int highbd_params_neon[] = { 8, 10, 12 }; +INSTANTIATE_TEST_SUITE_P( + NEON, AV1HighbdSelfguidedFilterTest, + ::testing::Combine(::testing::Values(av1_apply_selfguided_restoration_neon), + ::testing::ValuesIn(highbd_params_neon))); +#endif +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/set_maps.sh b/third_party/aom/test/set_maps.sh new file mode 100755 index 0000000000..b79357a2b8 --- /dev/null +++ b/third_party/aom/test/set_maps.sh @@ -0,0 +1,52 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom set_maps example. To add new tests to this file, +## do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to set_maps_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required, and set_maps must exist in +# $LIBAOM_BIN_PATH. +set_maps_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi + if [ -z "$(aom_tool_path set_maps)" ]; then + elog "set_maps not found. It must exist in LIBAOM_BIN_PATH or its parent." + return 1 + fi +} + +# Runs set_maps using the codec specified by $1. +set_maps() { + local encoder="$(aom_tool_path set_maps)" + local codec="$1" + local output_file="${AOM_TEST_OUTPUT_DIR}/set_maps_${codec}.ivf" + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \ + ${devnull} || return 1 + + [ -e "${output_file}" ] || return 1 +} + +set_maps_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + set_maps av1 || return 1 + fi +} + +set_maps_tests="set_maps_av1" + +run_tests set_maps_verify_environment "${set_maps_tests}" diff --git a/third_party/aom/test/sharpness_test.cc b/third_party/aom/test/sharpness_test.cc new file mode 100644 index 0000000000..64465c88eb --- /dev/null +++ b/third_party/aom/test/sharpness_test.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <unordered_map> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +namespace { +const unsigned int kFrames = 10; +const int kBitrate = 500; +const unsigned int kCqLevel = 18; + +// List of psnr thresholds for different test combinations +// keys: test-mode, cpu-used, sharpness. +const std::unordered_map< + int, std::unordered_map<int, std::unordered_map<int, double>>> + kPsnrThreshold = { { static_cast<int>(::libaom_test::kTwoPassGood), + { { 2, { { 2, 37.6 }, { 5, 37.6 } } }, + { 4, { { 2, 37.5 }, { 5, 37.5 } } }, + { 6, { { 2, 37.5 }, { 5, 37.5 } } } } }, + { static_cast<int>(::libaom_test::kAllIntra), + { { 3, { { 2, 42.2 }, { 5, 42.2 } } }, + { 6, { { 2, 41.8 }, { 4, 41.9 }, { 5, 41.9 } } }, + { 9, { { 2, 41.0 }, { 5, 41.0 } } } } } }; + +// This class is used to test sharpness parameter configured through control +// call using AOME_SET_SHARPNESS for different encoder configurations. +class SharpnessTest + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int, + int>, + public ::libaom_test::EncoderTest { + protected: + SharpnessTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + cpu_used_(GET_PARAM(2)), sharpness_level_(GET_PARAM(3)), psnr_(0.0), + nframes_(0) {} + + ~SharpnessTest() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + if (encoding_mode_ == ::libaom_test::kTwoPassGood) { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_lag_in_frames = 5; + } + } + + void BeginPassHook(unsigned int) override { + psnr_ = 0.0; + nframes_ = 0; + } + + void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) override { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + encoder->Control(AOME_SET_SHARPNESS, sharpness_level_); + if (encoding_mode_ == ::libaom_test::kTwoPassGood) { + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); + } else if (encoding_mode_ == ::libaom_test::kAllIntra) { + encoder->Control(AOME_SET_CQ_LEVEL, kCqLevel); + } + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetPsnrThreshold() { + return kPsnrThreshold.at(encoding_mode_).at(cpu_used_).at(sharpness_level_); + } + + void DoTest() { + init_flags_ = AOM_CODEC_USE_PSNR; + + std::unique_ptr<libaom_test::VideoSource> video( + new libaom_test::Y4mVideoSource("paris_352_288_30.y4m", 0, kFrames)); + ASSERT_NE(video, nullptr); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, GetPsnrThreshold()) + << "encoding mode = " << encoding_mode_ << ", cpu used = " << cpu_used_ + << ", sharpness level = " << sharpness_level_; + } + + private: + const libaom_test::TestMode encoding_mode_; + const int cpu_used_; + const int sharpness_level_; + double psnr_; + unsigned int nframes_; +}; + +class SharpnessTestLarge : public SharpnessTest {}; + +class SharpnessAllIntraTest : public SharpnessTest {}; + +class SharpnessAllIntraTestLarge : public SharpnessTest {}; + +TEST_P(SharpnessTestLarge, SharpnessPSNRTest) { DoTest(); } + +TEST_P(SharpnessAllIntraTest, SharpnessPSNRTest) { DoTest(); } + +TEST_P(SharpnessAllIntraTestLarge, SharpnessPSNRTest) { DoTest(); } + +AV1_INSTANTIATE_TEST_SUITE(SharpnessTestLarge, + ::testing::Values(::libaom_test::kTwoPassGood), + ::testing::Values(2, 4, 6), // cpu_used + ::testing::Values(2, 5)); // sharpness level + +AV1_INSTANTIATE_TEST_SUITE(SharpnessAllIntraTest, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::Values(6), // cpu_used + ::testing::Values(4)); // sharpness level + +AV1_INSTANTIATE_TEST_SUITE(SharpnessAllIntraTestLarge, + ::testing::Values(::libaom_test::kAllIntra), + ::testing::Values(3, 6, 9), // cpu_used + ::testing::Values(2, 5)); // sharpness level +} // namespace diff --git a/third_party/aom/test/simd_avx2_test.cc b/third_party/aom/test/simd_avx2_test.cc new file mode 100644 index 0000000000..8a012bff88 --- /dev/null +++ b/third_party/aom/test/simd_avx2_test.cc @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#define ARCH AVX2 +#define ARCH_POSTFIX(name) name##_avx2 +#define SIMD_NAMESPACE simd_test_avx2 +#include "test/simd_impl.h" diff --git a/third_party/aom/test/simd_cmp_avx2.cc b/third_party/aom/test/simd_cmp_avx2.cc new file mode 100644 index 0000000000..cda632bcdf --- /dev/null +++ b/third_party/aom/test/simd_cmp_avx2.cc @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#define ARCH AVX2 +#define ARCH_POSTFIX(name) name##_avx2 +#define SIMD_NAMESPACE simd_test_avx2 +#include "test/simd_cmp_impl.h" diff --git a/third_party/aom/test/simd_cmp_impl.h b/third_party/aom/test/simd_cmp_impl.h new file mode 100644 index 0000000000..cf85a471cd --- /dev/null +++ b/third_party/aom/test/simd_cmp_impl.h @@ -0,0 +1,2175 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <assert.h> +#include <string> + +#include "config/aom_dsp_rtcd.h" + +#include "test/acm_random.h" +// Inlining not forced for the compiler due to some tests calling +// SIMD_INLINE functions via function pointers +#undef SIMD_INLINE +#define SIMD_INLINE static inline +#include "aom_dsp/aom_simd.h" +#include "aom_dsp/simd/v256_intrinsics_c.h" + +// Machine tuned code goes into this file. This file is included from +// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros +// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX(). + +#ifdef _MSC_VER +// Disable "value of intrinsic immediate argument 'value' is out of range +// 'lowerbound - upperbound'" warning. Visual Studio emits this warning though +// the parameters are conditionally checked in e.g., v256_shr_n_byte. Adding a +// mask doesn't always appear to be sufficient. +#pragma warning(disable : 4556) +#endif + +using libaom_test::ACMRandom; + +namespace SIMD_NAMESPACE { + +// Wrap templates around intrinsics using immediate values +template <int shift> +v64 imm_v64_shl_n_byte(v64 a) { + return v64_shl_n_byte(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_byte(v64 a) { + return v64_shr_n_byte(a, shift); +} +template <int shift> +v64 imm_v64_shl_n_8(v64 a) { + return v64_shl_n_8(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_u8(v64 a) { + return v64_shr_n_u8(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_s8(v64 a) { + return v64_shr_n_s8(a, shift); +} +template <int shift> +v64 imm_v64_shl_n_16(v64 a) { + return v64_shl_n_16(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_u16(v64 a) { + return v64_shr_n_u16(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_s16(v64 a) { + return v64_shr_n_s16(a, shift); +} +template <int shift> +v64 imm_v64_shl_n_32(v64 a) { + return v64_shl_n_32(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_u32(v64 a) { + return v64_shr_n_u32(a, shift); +} +template <int shift> +v64 imm_v64_shr_n_s32(v64 a) { + return v64_shr_n_s32(a, shift); +} +template <int shift> +v64 imm_v64_align(v64 a, v64 b) { + return v64_align(a, b, shift); +} + +// Wrap templates around corresponding C implementations of the above +template <int shift> +c_v64 c_imm_v64_shl_n_byte(c_v64 a) { + return c_v64_shl_n_byte(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_byte(c_v64 a) { + return c_v64_shr_n_byte(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shl_n_8(c_v64 a) { + return c_v64_shl_n_8(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_u8(c_v64 a) { + return c_v64_shr_n_u8(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_s8(c_v64 a) { + return c_v64_shr_n_s8(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shl_n_16(c_v64 a) { + return c_v64_shl_n_16(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_u16(c_v64 a) { + return c_v64_shr_n_u16(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_s16(c_v64 a) { + return c_v64_shr_n_s16(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shl_n_32(c_v64 a) { + return c_v64_shl_n_32(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_u32(c_v64 a) { + return c_v64_shr_n_u32(a, shift); +} +template <int shift> +c_v64 c_imm_v64_shr_n_s32(c_v64 a) { + return c_v64_shr_n_s32(a, shift); +} +template <int shift> +c_v64 c_imm_v64_align(c_v64 a, c_v64 b) { + return c_v64_align(a, b, shift); +} + +template <int shift> +v128 imm_v128_shl_n_byte(v128 a) { + return v128_shl_n_byte(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_byte(v128 a) { + return v128_shr_n_byte(a, shift); +} +template <int shift> +v128 imm_v128_shl_n_8(v128 a) { + return v128_shl_n_8(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_u8(v128 a) { + return v128_shr_n_u8(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_s8(v128 a) { + return v128_shr_n_s8(a, shift); +} +template <int shift> +v128 imm_v128_shl_n_16(v128 a) { + return v128_shl_n_16(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_u16(v128 a) { + return v128_shr_n_u16(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_s16(v128 a) { + return v128_shr_n_s16(a, shift); +} +template <int shift> +v128 imm_v128_shl_n_32(v128 a) { + return v128_shl_n_32(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_u32(v128 a) { + return v128_shr_n_u32(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_s32(v128 a) { + return v128_shr_n_s32(a, shift); +} +template <int shift> +v128 imm_v128_shl_n_64(v128 a) { + return v128_shl_n_64(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_u64(v128 a) { + return v128_shr_n_u64(a, shift); +} +template <int shift> +v128 imm_v128_shr_n_s64(v128 a) { + return v128_shr_n_s64(a, shift); +} +template <int shift> +v128 imm_v128_align(v128 a, v128 b) { + return v128_align(a, b, shift); +} + +template <int shift> +c_v128 c_imm_v128_shl_n_byte(c_v128 a) { + return c_v128_shl_n_byte(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_byte(c_v128 a) { + return c_v128_shr_n_byte(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shl_n_8(c_v128 a) { + return c_v128_shl_n_8(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_u8(c_v128 a) { + return c_v128_shr_n_u8(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_s8(c_v128 a) { + return c_v128_shr_n_s8(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shl_n_16(c_v128 a) { + return c_v128_shl_n_16(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_u16(c_v128 a) { + return c_v128_shr_n_u16(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_s16(c_v128 a) { + return c_v128_shr_n_s16(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shl_n_32(c_v128 a) { + return c_v128_shl_n_32(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_u32(c_v128 a) { + return c_v128_shr_n_u32(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_s32(c_v128 a) { + return c_v128_shr_n_s32(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shl_n_64(c_v128 a) { + return c_v128_shl_n_64(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_u64(c_v128 a) { + return c_v128_shr_n_u64(a, shift); +} +template <int shift> +c_v128 c_imm_v128_shr_n_s64(c_v128 a) { + return c_v128_shr_n_s64(a, shift); +} +template <int shift> +c_v128 c_imm_v128_align(c_v128 a, c_v128 b) { + return c_v128_align(a, b, shift); +} + +template <int shift> +v256 imm_v256_shl_n_word(v256 a) { + return v256_shl_n_word(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_word(v256 a) { + return v256_shr_n_word(a, shift); +} +template <int shift> +v256 imm_v256_shl_n_byte(v256 a) { + return v256_shl_n_byte(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_byte(v256 a) { + return v256_shr_n_byte(a, shift); +} +template <int shift> +v256 imm_v256_shl_n_8(v256 a) { + return v256_shl_n_8(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_u8(v256 a) { + return v256_shr_n_u8(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_s8(v256 a) { + return v256_shr_n_s8(a, shift); +} +template <int shift> +v256 imm_v256_shl_n_16(v256 a) { + return v256_shl_n_16(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_u16(v256 a) { + return v256_shr_n_u16(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_s16(v256 a) { + return v256_shr_n_s16(a, shift); +} +template <int shift> +v256 imm_v256_shl_n_32(v256 a) { + return v256_shl_n_32(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_u32(v256 a) { + return v256_shr_n_u32(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_s32(v256 a) { + return v256_shr_n_s32(a, shift); +} +template <int shift> +v256 imm_v256_shl_n_64(v256 a) { + return v256_shl_n_64(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_u64(v256 a) { + return v256_shr_n_u64(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_s64(v256 a) { + return v256_shr_n_s64(a, shift); +} +template <int shift> +v256 imm_v256_align(v256 a, v256 b) { + return v256_align(a, b, shift); +} + +template <int shift> +c_v256 c_imm_v256_shl_n_word(c_v256 a) { + return c_v256_shl_n_word(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_word(c_v256 a) { + return c_v256_shr_n_word(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shl_n_byte(c_v256 a) { + return c_v256_shl_n_byte(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_byte(c_v256 a) { + return c_v256_shr_n_byte(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shl_n_8(c_v256 a) { + return c_v256_shl_n_8(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_u8(c_v256 a) { + return c_v256_shr_n_u8(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_s8(c_v256 a) { + return c_v256_shr_n_s8(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shl_n_16(c_v256 a) { + return c_v256_shl_n_16(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_u16(c_v256 a) { + return c_v256_shr_n_u16(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_s16(c_v256 a) { + return c_v256_shr_n_s16(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shl_n_32(c_v256 a) { + return c_v256_shl_n_32(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_u32(c_v256 a) { + return c_v256_shr_n_u32(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_s32(c_v256 a) { + return c_v256_shr_n_s32(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shl_n_64(c_v256 a) { + return c_v256_shl_n_64(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_u64(c_v256 a) { + return c_v256_shr_n_u64(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_s64(c_v256 a) { + return c_v256_shr_n_s64(a, shift); +} +template <int shift> +c_v256 c_imm_v256_align(c_v256 a, c_v256 b) { + return c_v256_align(a, b, shift); +} + +// Wrappers around the the SAD and SSD functions +uint32_t v64_sad_u8(v64 a, v64 b) { + return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b)); +} +uint32_t v64_ssd_u8(v64 a, v64 b) { + return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b)); +} + +uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) { + return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b)); +} +uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) { + return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b)); +} +uint32_t v128_sad_u8(v128 a, v128 b) { + return v128_sad_u8_sum(::v128_sad_u8(v128_sad_u8_init(), a, b)); +} +uint32_t v128_ssd_u8(v128 a, v128 b) { + return v128_ssd_u8_sum(::v128_ssd_u8(v128_ssd_u8_init(), a, b)); +} +uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) { + return c_v128_sad_u8_sum(::c_v128_sad_u8(c_v128_sad_u8_init(), a, b)); +} +uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) { + return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b)); +} +uint32_t v128_sad_u16(v128 a, v128 b) { + return v128_sad_u16_sum(::v128_sad_u16(v128_sad_u16_init(), a, b)); +} +uint64_t v128_ssd_s16(v128 a, v128 b) { + return v128_ssd_s16_sum(::v128_ssd_s16(v128_ssd_s16_init(), a, b)); +} +uint32_t c_v128_sad_u16(c_v128 a, c_v128 b) { + return c_v128_sad_u16_sum(::c_v128_sad_u16(c_v128_sad_u16_init(), a, b)); +} +uint64_t c_v128_ssd_s16(c_v128 a, c_v128 b) { + return c_v128_ssd_s16_sum(::c_v128_ssd_s16(c_v128_ssd_s16_init(), a, b)); +} +uint32_t v256_sad_u8(v256 a, v256 b) { + return v256_sad_u8_sum(::v256_sad_u8(v256_sad_u8_init(), a, b)); +} +uint32_t v256_ssd_u8(v256 a, v256 b) { + return v256_ssd_u8_sum(::v256_ssd_u8(v256_ssd_u8_init(), a, b)); +} +uint32_t c_v256_sad_u8(c_v256 a, c_v256 b) { + return c_v256_sad_u8_sum(::c_v256_sad_u8(c_v256_sad_u8_init(), a, b)); +} +uint32_t c_v256_ssd_u8(c_v256 a, c_v256 b) { + return c_v256_ssd_u8_sum(::c_v256_ssd_u8(c_v256_ssd_u8_init(), a, b)); +} +uint32_t v256_sad_u16(v256 a, v256 b) { + return v256_sad_u16_sum(::v256_sad_u16(v256_sad_u16_init(), a, b)); +} +uint64_t v256_ssd_s16(v256 a, v256 b) { + return v256_ssd_s16_sum(::v256_ssd_s16(v256_ssd_s16_init(), a, b)); +} +uint32_t c_v256_sad_u16(c_v256 a, c_v256 b) { + return c_v256_sad_u16_sum(::c_v256_sad_u16(c_v256_sad_u16_init(), a, b)); +} +uint64_t c_v256_ssd_s16(c_v256 a, c_v256 b) { + return c_v256_ssd_s16_sum(::c_v256_ssd_s16(c_v256_ssd_s16_init(), a, b)); +} + +namespace { + +typedef void (*fptr)(); + +typedef struct { + const char *name; + fptr ref; + fptr simd; +} mapping; + +#define MAP(name) \ + { #name, reinterpret_cast < fptr>(c_##name), reinterpret_cast < fptr>(name) } + +const mapping m[] = { MAP(v64_sad_u8), + MAP(v64_ssd_u8), + MAP(v64_add_8), + MAP(v64_add_16), + MAP(v64_sadd_s8), + MAP(v64_sadd_u8), + MAP(v64_sadd_s16), + MAP(v64_add_32), + MAP(v64_sub_8), + MAP(v64_ssub_u8), + MAP(v64_ssub_s8), + MAP(v64_sub_16), + MAP(v64_ssub_s16), + MAP(v64_ssub_u16), + MAP(v64_sub_32), + MAP(v64_ziplo_8), + MAP(v64_ziphi_8), + MAP(v64_ziplo_16), + MAP(v64_ziphi_16), + MAP(v64_ziplo_32), + MAP(v64_ziphi_32), + MAP(v64_pack_s32_u16), + MAP(v64_pack_s32_s16), + MAP(v64_pack_s16_u8), + MAP(v64_pack_s16_s8), + MAP(v64_unziphi_8), + MAP(v64_unziplo_8), + MAP(v64_unziphi_16), + MAP(v64_unziplo_16), + MAP(v64_or), + MAP(v64_xor), + MAP(v64_and), + MAP(v64_andn), + MAP(v64_mullo_s16), + MAP(v64_mulhi_s16), + MAP(v64_mullo_s32), + MAP(v64_madd_s16), + MAP(v64_madd_us8), + MAP(v64_avg_u8), + MAP(v64_rdavg_u8), + MAP(v64_rdavg_u16), + MAP(v64_avg_u16), + MAP(v64_min_u8), + MAP(v64_max_u8), + MAP(v64_min_s8), + MAP(v64_max_s8), + MAP(v64_min_s16), + MAP(v64_max_s16), + MAP(v64_cmpgt_s8), + MAP(v64_cmplt_s8), + MAP(v64_cmpeq_8), + MAP(v64_cmpgt_s16), + MAP(v64_cmplt_s16), + MAP(v64_cmpeq_16), + MAP(v64_shuffle_8), + MAP(imm_v64_align<1>), + MAP(imm_v64_align<2>), + MAP(imm_v64_align<3>), + MAP(imm_v64_align<4>), + MAP(imm_v64_align<5>), + MAP(imm_v64_align<6>), + MAP(imm_v64_align<7>), + MAP(v64_abs_s8), + MAP(v64_abs_s16), + MAP(v64_unpacklo_u8_s16), + MAP(v64_unpackhi_u8_s16), + MAP(v64_unpacklo_s8_s16), + MAP(v64_unpackhi_s8_s16), + MAP(v64_unpacklo_u16_s32), + MAP(v64_unpacklo_s16_s32), + MAP(v64_unpackhi_u16_s32), + MAP(v64_unpackhi_s16_s32), + MAP(imm_v64_shr_n_byte<1>), + MAP(imm_v64_shr_n_byte<2>), + MAP(imm_v64_shr_n_byte<3>), + MAP(imm_v64_shr_n_byte<4>), + MAP(imm_v64_shr_n_byte<5>), + MAP(imm_v64_shr_n_byte<6>), + MAP(imm_v64_shr_n_byte<7>), + MAP(imm_v64_shl_n_byte<1>), + MAP(imm_v64_shl_n_byte<2>), + MAP(imm_v64_shl_n_byte<3>), + MAP(imm_v64_shl_n_byte<4>), + MAP(imm_v64_shl_n_byte<5>), + MAP(imm_v64_shl_n_byte<6>), + MAP(imm_v64_shl_n_byte<7>), + MAP(imm_v64_shl_n_8<1>), + MAP(imm_v64_shl_n_8<2>), + MAP(imm_v64_shl_n_8<3>), + MAP(imm_v64_shl_n_8<4>), + MAP(imm_v64_shl_n_8<5>), + MAP(imm_v64_shl_n_8<6>), + MAP(imm_v64_shl_n_8<7>), + MAP(imm_v64_shr_n_u8<1>), + MAP(imm_v64_shr_n_u8<2>), + MAP(imm_v64_shr_n_u8<3>), + MAP(imm_v64_shr_n_u8<4>), + MAP(imm_v64_shr_n_u8<5>), + MAP(imm_v64_shr_n_u8<6>), + MAP(imm_v64_shr_n_u8<7>), + MAP(imm_v64_shr_n_s8<1>), + MAP(imm_v64_shr_n_s8<2>), + MAP(imm_v64_shr_n_s8<3>), + MAP(imm_v64_shr_n_s8<4>), + MAP(imm_v64_shr_n_s8<5>), + MAP(imm_v64_shr_n_s8<6>), + MAP(imm_v64_shr_n_s8<7>), + MAP(imm_v64_shl_n_16<1>), + MAP(imm_v64_shl_n_16<2>), + MAP(imm_v64_shl_n_16<4>), + MAP(imm_v64_shl_n_16<6>), + MAP(imm_v64_shl_n_16<8>), + MAP(imm_v64_shl_n_16<10>), + MAP(imm_v64_shl_n_16<12>), + MAP(imm_v64_shl_n_16<14>), + MAP(imm_v64_shr_n_u16<1>), + MAP(imm_v64_shr_n_u16<2>), + MAP(imm_v64_shr_n_u16<4>), + MAP(imm_v64_shr_n_u16<6>), + MAP(imm_v64_shr_n_u16<8>), + MAP(imm_v64_shr_n_u16<10>), + MAP(imm_v64_shr_n_u16<12>), + MAP(imm_v64_shr_n_u16<14>), + MAP(imm_v64_shr_n_s16<1>), + MAP(imm_v64_shr_n_s16<2>), + MAP(imm_v64_shr_n_s16<4>), + MAP(imm_v64_shr_n_s16<6>), + MAP(imm_v64_shr_n_s16<8>), + MAP(imm_v64_shr_n_s16<10>), + MAP(imm_v64_shr_n_s16<12>), + MAP(imm_v64_shr_n_s16<14>), + MAP(imm_v64_shl_n_32<1>), + MAP(imm_v64_shl_n_32<4>), + MAP(imm_v64_shl_n_32<8>), + MAP(imm_v64_shl_n_32<12>), + MAP(imm_v64_shl_n_32<16>), + MAP(imm_v64_shl_n_32<20>), + MAP(imm_v64_shl_n_32<24>), + MAP(imm_v64_shl_n_32<28>), + MAP(imm_v64_shr_n_u32<1>), + MAP(imm_v64_shr_n_u32<4>), + MAP(imm_v64_shr_n_u32<8>), + MAP(imm_v64_shr_n_u32<12>), + MAP(imm_v64_shr_n_u32<16>), + MAP(imm_v64_shr_n_u32<20>), + MAP(imm_v64_shr_n_u32<24>), + MAP(imm_v64_shr_n_u32<28>), + MAP(imm_v64_shr_n_s32<1>), + MAP(imm_v64_shr_n_s32<4>), + MAP(imm_v64_shr_n_s32<8>), + MAP(imm_v64_shr_n_s32<12>), + MAP(imm_v64_shr_n_s32<16>), + MAP(imm_v64_shr_n_s32<20>), + MAP(imm_v64_shr_n_s32<24>), + MAP(imm_v64_shr_n_s32<28>), + MAP(v64_shl_8), + MAP(v64_shr_u8), + MAP(v64_shr_s8), + MAP(v64_shl_16), + MAP(v64_shr_u16), + MAP(v64_shr_s16), + MAP(v64_shl_32), + MAP(v64_shr_u32), + MAP(v64_shr_s32), + MAP(v64_hadd_u8), + MAP(v64_hadd_s16), + MAP(v64_dotp_s16), + MAP(v64_dotp_su8), + MAP(v64_u64), + MAP(v64_low_u32), + MAP(v64_high_u32), + MAP(v64_low_s32), + MAP(v64_high_s32), + MAP(v64_dup_8), + MAP(v64_dup_16), + MAP(v64_dup_32), + MAP(v64_from_32), + MAP(v64_zero), + MAP(v64_from_16), + MAP(v128_sad_u8), + MAP(v128_ssd_u8), + MAP(v128_sad_u16), + MAP(v128_ssd_s16), + MAP(v128_add_8), + MAP(v128_add_16), + MAP(v128_sadd_s8), + MAP(v128_sadd_u8), + MAP(v128_sadd_s16), + MAP(v128_add_32), + MAP(v128_add_64), + MAP(v128_sub_8), + MAP(v128_ssub_u8), + MAP(v128_ssub_s8), + MAP(v128_sub_16), + MAP(v128_ssub_s16), + MAP(v128_ssub_u16), + MAP(v128_sub_32), + MAP(v128_sub_64), + MAP(v128_ziplo_8), + MAP(v128_ziphi_8), + MAP(v128_ziplo_16), + MAP(v128_ziphi_16), + MAP(v128_ziplo_32), + MAP(v128_ziphi_32), + MAP(v128_ziplo_64), + MAP(v128_ziphi_64), + MAP(v128_unziphi_8), + MAP(v128_unziplo_8), + MAP(v128_unziphi_16), + MAP(v128_unziplo_16), + MAP(v128_unziphi_32), + MAP(v128_unziplo_32), + MAP(v128_pack_s32_u16), + MAP(v128_pack_s32_s16), + MAP(v128_pack_s16_u8), + MAP(v128_pack_s16_s8), + MAP(v128_or), + MAP(v128_xor), + MAP(v128_and), + MAP(v128_andn), + MAP(v128_mullo_s16), + MAP(v128_mulhi_s16), + MAP(v128_mullo_s32), + MAP(v128_madd_s16), + MAP(v128_madd_us8), + MAP(v128_avg_u8), + MAP(v128_rdavg_u8), + MAP(v128_rdavg_u16), + MAP(v128_avg_u16), + MAP(v128_min_u8), + MAP(v128_max_u8), + MAP(v128_min_s8), + MAP(v128_max_s8), + MAP(v128_min_s16), + MAP(v128_max_s16), + MAP(v128_min_s32), + MAP(v128_max_s32), + MAP(v128_cmpgt_s8), + MAP(v128_cmplt_s8), + MAP(v128_cmpeq_8), + MAP(v128_cmpgt_s16), + MAP(v128_cmpeq_16), + MAP(v128_cmplt_s16), + MAP(v128_cmpgt_s32), + MAP(v128_cmpeq_32), + MAP(v128_cmplt_s32), + MAP(v128_shuffle_8), + MAP(imm_v128_align<1>), + MAP(imm_v128_align<2>), + MAP(imm_v128_align<3>), + MAP(imm_v128_align<4>), + MAP(imm_v128_align<5>), + MAP(imm_v128_align<6>), + MAP(imm_v128_align<7>), + MAP(imm_v128_align<8>), + MAP(imm_v128_align<9>), + MAP(imm_v128_align<10>), + MAP(imm_v128_align<11>), + MAP(imm_v128_align<12>), + MAP(imm_v128_align<13>), + MAP(imm_v128_align<14>), + MAP(imm_v128_align<15>), + MAP(v128_abs_s8), + MAP(v128_abs_s16), + MAP(v128_padd_u8), + MAP(v128_padd_s16), + MAP(v128_unpacklo_u16_s32), + MAP(v128_unpacklo_s16_s32), + MAP(v128_unpackhi_u16_s32), + MAP(v128_unpackhi_s16_s32), + MAP(imm_v128_shr_n_byte<1>), + MAP(imm_v128_shr_n_byte<2>), + MAP(imm_v128_shr_n_byte<3>), + MAP(imm_v128_shr_n_byte<4>), + MAP(imm_v128_shr_n_byte<5>), + MAP(imm_v128_shr_n_byte<6>), + MAP(imm_v128_shr_n_byte<7>), + MAP(imm_v128_shr_n_byte<8>), + MAP(imm_v128_shr_n_byte<9>), + MAP(imm_v128_shr_n_byte<10>), + MAP(imm_v128_shr_n_byte<11>), + MAP(imm_v128_shr_n_byte<12>), + MAP(imm_v128_shr_n_byte<13>), + MAP(imm_v128_shr_n_byte<14>), + MAP(imm_v128_shr_n_byte<15>), + MAP(imm_v128_shl_n_byte<1>), + MAP(imm_v128_shl_n_byte<2>), + MAP(imm_v128_shl_n_byte<3>), + MAP(imm_v128_shl_n_byte<4>), + MAP(imm_v128_shl_n_byte<5>), + MAP(imm_v128_shl_n_byte<6>), + MAP(imm_v128_shl_n_byte<7>), + MAP(imm_v128_shl_n_byte<8>), + MAP(imm_v128_shl_n_byte<9>), + MAP(imm_v128_shl_n_byte<10>), + MAP(imm_v128_shl_n_byte<11>), + MAP(imm_v128_shl_n_byte<12>), + MAP(imm_v128_shl_n_byte<13>), + MAP(imm_v128_shl_n_byte<14>), + MAP(imm_v128_shl_n_byte<15>), + MAP(imm_v128_shl_n_8<1>), + MAP(imm_v128_shl_n_8<2>), + MAP(imm_v128_shl_n_8<3>), + MAP(imm_v128_shl_n_8<4>), + MAP(imm_v128_shl_n_8<5>), + MAP(imm_v128_shl_n_8<6>), + MAP(imm_v128_shl_n_8<7>), + MAP(imm_v128_shr_n_u8<1>), + MAP(imm_v128_shr_n_u8<2>), + MAP(imm_v128_shr_n_u8<3>), + MAP(imm_v128_shr_n_u8<4>), + MAP(imm_v128_shr_n_u8<5>), + MAP(imm_v128_shr_n_u8<6>), + MAP(imm_v128_shr_n_u8<7>), + MAP(imm_v128_shr_n_s8<1>), + MAP(imm_v128_shr_n_s8<2>), + MAP(imm_v128_shr_n_s8<3>), + MAP(imm_v128_shr_n_s8<4>), + MAP(imm_v128_shr_n_s8<5>), + MAP(imm_v128_shr_n_s8<6>), + MAP(imm_v128_shr_n_s8<7>), + MAP(imm_v128_shl_n_16<1>), + MAP(imm_v128_shl_n_16<2>), + MAP(imm_v128_shl_n_16<4>), + MAP(imm_v128_shl_n_16<6>), + MAP(imm_v128_shl_n_16<8>), + MAP(imm_v128_shl_n_16<10>), + MAP(imm_v128_shl_n_16<12>), + MAP(imm_v128_shl_n_16<14>), + MAP(imm_v128_shr_n_u16<1>), + MAP(imm_v128_shr_n_u16<2>), + MAP(imm_v128_shr_n_u16<4>), + MAP(imm_v128_shr_n_u16<6>), + MAP(imm_v128_shr_n_u16<8>), + MAP(imm_v128_shr_n_u16<10>), + MAP(imm_v128_shr_n_u16<12>), + MAP(imm_v128_shr_n_u16<14>), + MAP(imm_v128_shr_n_s16<1>), + MAP(imm_v128_shr_n_s16<2>), + MAP(imm_v128_shr_n_s16<4>), + MAP(imm_v128_shr_n_s16<6>), + MAP(imm_v128_shr_n_s16<8>), + MAP(imm_v128_shr_n_s16<10>), + MAP(imm_v128_shr_n_s16<12>), + MAP(imm_v128_shr_n_s16<14>), + MAP(imm_v128_shl_n_32<1>), + MAP(imm_v128_shl_n_32<4>), + MAP(imm_v128_shl_n_32<8>), + MAP(imm_v128_shl_n_32<12>), + MAP(imm_v128_shl_n_32<16>), + MAP(imm_v128_shl_n_32<20>), + MAP(imm_v128_shl_n_32<24>), + MAP(imm_v128_shl_n_32<28>), + MAP(imm_v128_shr_n_u32<1>), + MAP(imm_v128_shr_n_u32<4>), + MAP(imm_v128_shr_n_u32<8>), + MAP(imm_v128_shr_n_u32<12>), + MAP(imm_v128_shr_n_u32<16>), + MAP(imm_v128_shr_n_u32<20>), + MAP(imm_v128_shr_n_u32<24>), + MAP(imm_v128_shr_n_u32<28>), + MAP(imm_v128_shr_n_s32<1>), + MAP(imm_v128_shr_n_s32<4>), + MAP(imm_v128_shr_n_s32<8>), + MAP(imm_v128_shr_n_s32<12>), + MAP(imm_v128_shr_n_s32<16>), + MAP(imm_v128_shr_n_s32<20>), + MAP(imm_v128_shr_n_s32<24>), + MAP(imm_v128_shr_n_s32<28>), + MAP(imm_v128_shl_n_64<1>), + MAP(imm_v128_shl_n_64<4>), + MAP(imm_v128_shl_n_64<8>), + MAP(imm_v128_shl_n_64<12>), + MAP(imm_v128_shl_n_64<16>), + MAP(imm_v128_shl_n_64<20>), + MAP(imm_v128_shl_n_64<24>), + MAP(imm_v128_shl_n_64<28>), + MAP(imm_v128_shl_n_64<32>), + MAP(imm_v128_shl_n_64<36>), + MAP(imm_v128_shl_n_64<40>), + MAP(imm_v128_shl_n_64<44>), + MAP(imm_v128_shl_n_64<48>), + MAP(imm_v128_shl_n_64<52>), + MAP(imm_v128_shl_n_64<56>), + MAP(imm_v128_shl_n_64<60>), + MAP(imm_v128_shr_n_u64<1>), + MAP(imm_v128_shr_n_u64<4>), + MAP(imm_v128_shr_n_u64<8>), + MAP(imm_v128_shr_n_u64<12>), + MAP(imm_v128_shr_n_u64<16>), + MAP(imm_v128_shr_n_u64<20>), + MAP(imm_v128_shr_n_u64<24>), + MAP(imm_v128_shr_n_u64<28>), + MAP(imm_v128_shr_n_u64<32>), + MAP(imm_v128_shr_n_u64<36>), + MAP(imm_v128_shr_n_u64<40>), + MAP(imm_v128_shr_n_u64<44>), + MAP(imm_v128_shr_n_u64<48>), + MAP(imm_v128_shr_n_u64<52>), + MAP(imm_v128_shr_n_u64<56>), + MAP(imm_v128_shr_n_u64<60>), + MAP(imm_v128_shr_n_s64<1>), + MAP(imm_v128_shr_n_s64<4>), + MAP(imm_v128_shr_n_s64<8>), + MAP(imm_v128_shr_n_s64<12>), + MAP(imm_v128_shr_n_s64<16>), + MAP(imm_v128_shr_n_s64<20>), + MAP(imm_v128_shr_n_s64<24>), + MAP(imm_v128_shr_n_s64<28>), + MAP(imm_v128_shr_n_s64<32>), + MAP(imm_v128_shr_n_s64<36>), + MAP(imm_v128_shr_n_s64<40>), + MAP(imm_v128_shr_n_s64<44>), + MAP(imm_v128_shr_n_s64<48>), + MAP(imm_v128_shr_n_s64<52>), + MAP(imm_v128_shr_n_s64<56>), + MAP(imm_v128_shr_n_s64<60>), + MAP(v128_from_v64), + MAP(v128_zip_8), + MAP(v128_zip_16), + MAP(v128_zip_32), + MAP(v128_mul_s16), + MAP(v128_unpack_u8_s16), + MAP(v128_unpack_s8_s16), + MAP(v128_unpack_u16_s32), + MAP(v128_unpack_s16_s32), + MAP(v128_shl_8), + MAP(v128_shr_u8), + MAP(v128_shr_s8), + MAP(v128_shl_16), + MAP(v128_shr_u16), + MAP(v128_shr_s16), + MAP(v128_shl_32), + MAP(v128_shr_u32), + MAP(v128_shr_s32), + MAP(v128_shl_64), + MAP(v128_shr_u64), + MAP(v128_shr_s64), + MAP(v128_hadd_u8), + MAP(v128_dotp_su8), + MAP(v128_dotp_s16), + MAP(v128_dotp_s32), + MAP(v128_low_u32), + MAP(v128_low_v64), + MAP(v128_high_v64), + MAP(v128_from_64), + MAP(v128_from_32), + MAP(v128_movemask_8), + MAP(v128_zero), + MAP(v128_dup_8), + MAP(v128_dup_16), + MAP(v128_dup_32), + MAP(v128_dup_64), + MAP(v128_unpacklo_u8_s16), + MAP(v128_unpackhi_u8_s16), + MAP(v128_unpacklo_s8_s16), + MAP(v128_unpackhi_s8_s16), + MAP(v128_blend_8), + MAP(u32_load_unaligned), + MAP(u32_store_unaligned), + MAP(v64_load_unaligned), + MAP(v64_store_unaligned), + MAP(v128_load_unaligned), + MAP(v128_store_unaligned), + MAP(v256_sad_u8), + MAP(v256_ssd_u8), + MAP(v256_sad_u16), + MAP(v256_ssd_s16), + MAP(v256_hadd_u8), + MAP(v256_low_u64), + MAP(v256_dotp_su8), + MAP(v256_dotp_s16), + MAP(v256_dotp_s32), + MAP(v256_add_8), + MAP(v256_add_16), + MAP(v256_sadd_s8), + MAP(v256_sadd_u8), + MAP(v256_sadd_s16), + MAP(v256_add_32), + MAP(v256_add_64), + MAP(v256_sub_8), + MAP(v256_ssub_u8), + MAP(v256_ssub_s8), + MAP(v256_sub_16), + MAP(v256_ssub_u16), + MAP(v256_ssub_s16), + MAP(v256_sub_32), + MAP(v256_sub_64), + MAP(v256_ziplo_8), + MAP(v256_ziphi_8), + MAP(v256_ziplo_16), + MAP(v256_ziphi_16), + MAP(v256_ziplo_32), + MAP(v256_ziphi_32), + MAP(v256_ziplo_64), + MAP(v256_ziphi_64), + MAP(v256_unziphi_8), + MAP(v256_unziplo_8), + MAP(v256_unziphi_16), + MAP(v256_unziplo_16), + MAP(v256_unziphi_32), + MAP(v256_unziplo_32), + MAP(v256_unziphi_64), + MAP(v256_unziplo_64), + MAP(v256_pack_s32_u16), + MAP(v256_pack_s32_s16), + MAP(v256_pack_s16_u8), + MAP(v256_pack_s16_s8), + MAP(v256_or), + MAP(v256_xor), + MAP(v256_and), + MAP(v256_andn), + MAP(v256_mullo_s16), + MAP(v256_mulhi_s16), + MAP(v256_mullo_s32), + MAP(v256_madd_s16), + MAP(v256_madd_us8), + MAP(v256_avg_u8), + MAP(v256_rdavg_u8), + MAP(v256_rdavg_u16), + MAP(v256_avg_u16), + MAP(v256_min_u8), + MAP(v256_max_u8), + MAP(v256_min_s8), + MAP(v256_max_s8), + MAP(v256_min_s16), + MAP(v256_max_s16), + MAP(v256_min_s32), + MAP(v256_max_s32), + MAP(v256_cmpgt_s8), + MAP(v256_cmplt_s8), + MAP(v256_cmpeq_8), + MAP(v256_cmpgt_s16), + MAP(v256_cmplt_s16), + MAP(v256_cmpeq_16), + MAP(v256_cmpgt_s32), + MAP(v256_cmplt_s32), + MAP(v256_cmpeq_32), + MAP(v256_shuffle_8), + MAP(v256_pshuffle_8), + MAP(v256_wideshuffle_8), + MAP(imm_v256_align<1>), + MAP(imm_v256_align<2>), + MAP(imm_v256_align<3>), + MAP(imm_v256_align<4>), + MAP(imm_v256_align<5>), + MAP(imm_v256_align<6>), + MAP(imm_v256_align<7>), + MAP(imm_v256_align<8>), + MAP(imm_v256_align<9>), + MAP(imm_v256_align<10>), + MAP(imm_v256_align<11>), + MAP(imm_v256_align<12>), + MAP(imm_v256_align<13>), + MAP(imm_v256_align<14>), + MAP(imm_v256_align<15>), + MAP(imm_v256_align<16>), + MAP(imm_v256_align<17>), + MAP(imm_v256_align<18>), + MAP(imm_v256_align<19>), + MAP(imm_v256_align<20>), + MAP(imm_v256_align<21>), + MAP(imm_v256_align<22>), + MAP(imm_v256_align<23>), + MAP(imm_v256_align<24>), + MAP(imm_v256_align<25>), + MAP(imm_v256_align<26>), + MAP(imm_v256_align<27>), + MAP(imm_v256_align<28>), + MAP(imm_v256_align<29>), + MAP(imm_v256_align<30>), + MAP(imm_v256_align<31>), + MAP(v256_from_v128), + MAP(v256_zip_8), + MAP(v256_zip_16), + MAP(v256_zip_32), + MAP(v256_mul_s16), + MAP(v256_unpack_u8_s16), + MAP(v256_unpack_s8_s16), + MAP(v256_unpack_u16_s32), + MAP(v256_unpack_s16_s32), + MAP(v256_shl_8), + MAP(v256_shr_u8), + MAP(v256_shr_s8), + MAP(v256_shl_16), + MAP(v256_shr_u16), + MAP(v256_shr_s16), + MAP(v256_shl_32), + MAP(v256_shr_u32), + MAP(v256_shr_s32), + MAP(v256_shl_64), + MAP(v256_shr_u64), + MAP(v256_shr_s64), + MAP(v256_abs_s8), + MAP(v256_abs_s16), + MAP(v256_padd_u8), + MAP(v256_padd_s16), + MAP(v256_unpacklo_u16_s32), + MAP(v256_unpacklo_s16_s32), + MAP(v256_unpackhi_u16_s32), + MAP(v256_unpackhi_s16_s32), + MAP(imm_v256_shr_n_word<1>), + MAP(imm_v256_shr_n_word<2>), + MAP(imm_v256_shr_n_word<3>), + MAP(imm_v256_shr_n_word<4>), + MAP(imm_v256_shr_n_word<5>), + MAP(imm_v256_shr_n_word<6>), + MAP(imm_v256_shr_n_word<7>), + MAP(imm_v256_shr_n_word<8>), + MAP(imm_v256_shr_n_word<9>), + MAP(imm_v256_shr_n_word<10>), + MAP(imm_v256_shr_n_word<11>), + MAP(imm_v256_shr_n_word<12>), + MAP(imm_v256_shr_n_word<13>), + MAP(imm_v256_shr_n_word<14>), + MAP(imm_v256_shr_n_word<15>), + MAP(imm_v256_shl_n_word<1>), + MAP(imm_v256_shl_n_word<2>), + MAP(imm_v256_shl_n_word<3>), + MAP(imm_v256_shl_n_word<4>), + MAP(imm_v256_shl_n_word<5>), + MAP(imm_v256_shl_n_word<6>), + MAP(imm_v256_shl_n_word<7>), + MAP(imm_v256_shl_n_word<8>), + MAP(imm_v256_shl_n_word<9>), + MAP(imm_v256_shl_n_word<10>), + MAP(imm_v256_shl_n_word<11>), + MAP(imm_v256_shl_n_word<12>), + MAP(imm_v256_shl_n_word<13>), + MAP(imm_v256_shl_n_word<14>), + MAP(imm_v256_shl_n_word<15>), + MAP(imm_v256_shr_n_byte<1>), + MAP(imm_v256_shr_n_byte<2>), + MAP(imm_v256_shr_n_byte<3>), + MAP(imm_v256_shr_n_byte<4>), + MAP(imm_v256_shr_n_byte<5>), + MAP(imm_v256_shr_n_byte<6>), + MAP(imm_v256_shr_n_byte<7>), + MAP(imm_v256_shr_n_byte<8>), + MAP(imm_v256_shr_n_byte<9>), + MAP(imm_v256_shr_n_byte<10>), + MAP(imm_v256_shr_n_byte<11>), + MAP(imm_v256_shr_n_byte<12>), + MAP(imm_v256_shr_n_byte<13>), + MAP(imm_v256_shr_n_byte<14>), + MAP(imm_v256_shr_n_byte<15>), + MAP(imm_v256_shr_n_byte<16>), + MAP(imm_v256_shr_n_byte<17>), + MAP(imm_v256_shr_n_byte<18>), + MAP(imm_v256_shr_n_byte<19>), + MAP(imm_v256_shr_n_byte<20>), + MAP(imm_v256_shr_n_byte<21>), + MAP(imm_v256_shr_n_byte<22>), + MAP(imm_v256_shr_n_byte<23>), + MAP(imm_v256_shr_n_byte<24>), + MAP(imm_v256_shr_n_byte<25>), + MAP(imm_v256_shr_n_byte<26>), + MAP(imm_v256_shr_n_byte<27>), + MAP(imm_v256_shr_n_byte<28>), + MAP(imm_v256_shr_n_byte<29>), + MAP(imm_v256_shr_n_byte<30>), + MAP(imm_v256_shr_n_byte<31>), + MAP(imm_v256_shl_n_byte<1>), + MAP(imm_v256_shl_n_byte<2>), + MAP(imm_v256_shl_n_byte<3>), + MAP(imm_v256_shl_n_byte<4>), + MAP(imm_v256_shl_n_byte<5>), + MAP(imm_v256_shl_n_byte<6>), + MAP(imm_v256_shl_n_byte<7>), + MAP(imm_v256_shl_n_byte<8>), + MAP(imm_v256_shl_n_byte<9>), + MAP(imm_v256_shl_n_byte<10>), + MAP(imm_v256_shl_n_byte<11>), + MAP(imm_v256_shl_n_byte<12>), + MAP(imm_v256_shl_n_byte<13>), + MAP(imm_v256_shl_n_byte<14>), + MAP(imm_v256_shl_n_byte<15>), + MAP(imm_v256_shl_n_byte<16>), + MAP(imm_v256_shl_n_byte<17>), + MAP(imm_v256_shl_n_byte<18>), + MAP(imm_v256_shl_n_byte<19>), + MAP(imm_v256_shl_n_byte<20>), + MAP(imm_v256_shl_n_byte<21>), + MAP(imm_v256_shl_n_byte<22>), + MAP(imm_v256_shl_n_byte<23>), + MAP(imm_v256_shl_n_byte<24>), + MAP(imm_v256_shl_n_byte<25>), + MAP(imm_v256_shl_n_byte<26>), + MAP(imm_v256_shl_n_byte<27>), + MAP(imm_v256_shl_n_byte<28>), + MAP(imm_v256_shl_n_byte<29>), + MAP(imm_v256_shl_n_byte<30>), + MAP(imm_v256_shl_n_byte<31>), + MAP(imm_v256_shl_n_8<1>), + MAP(imm_v256_shl_n_8<2>), + MAP(imm_v256_shl_n_8<3>), + MAP(imm_v256_shl_n_8<4>), + MAP(imm_v256_shl_n_8<5>), + MAP(imm_v256_shl_n_8<6>), + MAP(imm_v256_shl_n_8<7>), + MAP(imm_v256_shr_n_u8<1>), + MAP(imm_v256_shr_n_u8<2>), + MAP(imm_v256_shr_n_u8<3>), + MAP(imm_v256_shr_n_u8<4>), + MAP(imm_v256_shr_n_u8<5>), + MAP(imm_v256_shr_n_u8<6>), + MAP(imm_v256_shr_n_u8<7>), + MAP(imm_v256_shr_n_s8<1>), + MAP(imm_v256_shr_n_s8<2>), + MAP(imm_v256_shr_n_s8<3>), + MAP(imm_v256_shr_n_s8<4>), + MAP(imm_v256_shr_n_s8<5>), + MAP(imm_v256_shr_n_s8<6>), + MAP(imm_v256_shr_n_s8<7>), + MAP(imm_v256_shl_n_16<1>), + MAP(imm_v256_shl_n_16<2>), + MAP(imm_v256_shl_n_16<4>), + MAP(imm_v256_shl_n_16<6>), + MAP(imm_v256_shl_n_16<8>), + MAP(imm_v256_shl_n_16<10>), + MAP(imm_v256_shl_n_16<12>), + MAP(imm_v256_shl_n_16<14>), + MAP(imm_v256_shr_n_u16<1>), + MAP(imm_v256_shr_n_u16<2>), + MAP(imm_v256_shr_n_u16<4>), + MAP(imm_v256_shr_n_u16<6>), + MAP(imm_v256_shr_n_u16<8>), + MAP(imm_v256_shr_n_u16<10>), + MAP(imm_v256_shr_n_u16<12>), + MAP(imm_v256_shr_n_u16<14>), + MAP(imm_v256_shr_n_s16<1>), + MAP(imm_v256_shr_n_s16<2>), + MAP(imm_v256_shr_n_s16<4>), + MAP(imm_v256_shr_n_s16<6>), + MAP(imm_v256_shr_n_s16<8>), + MAP(imm_v256_shr_n_s16<10>), + MAP(imm_v256_shr_n_s16<12>), + MAP(imm_v256_shr_n_s16<14>), + MAP(imm_v256_shl_n_32<1>), + MAP(imm_v256_shl_n_32<4>), + MAP(imm_v256_shl_n_32<8>), + MAP(imm_v256_shl_n_32<12>), + MAP(imm_v256_shl_n_32<16>), + MAP(imm_v256_shl_n_32<20>), + MAP(imm_v256_shl_n_32<24>), + MAP(imm_v256_shl_n_32<28>), + MAP(imm_v256_shr_n_u32<1>), + MAP(imm_v256_shr_n_u32<4>), + MAP(imm_v256_shr_n_u32<8>), + MAP(imm_v256_shr_n_u32<12>), + MAP(imm_v256_shr_n_u32<16>), + MAP(imm_v256_shr_n_u32<20>), + MAP(imm_v256_shr_n_u32<24>), + MAP(imm_v256_shr_n_u32<28>), + MAP(imm_v256_shr_n_s32<1>), + MAP(imm_v256_shr_n_s32<4>), + MAP(imm_v256_shr_n_s32<8>), + MAP(imm_v256_shr_n_s32<12>), + MAP(imm_v256_shr_n_s32<16>), + MAP(imm_v256_shr_n_s32<20>), + MAP(imm_v256_shr_n_s32<24>), + MAP(imm_v256_shr_n_s32<28>), + MAP(imm_v256_shl_n_64<1>), + MAP(imm_v256_shl_n_64<4>), + MAP(imm_v256_shl_n_64<8>), + MAP(imm_v256_shl_n_64<12>), + MAP(imm_v256_shl_n_64<16>), + MAP(imm_v256_shl_n_64<20>), + MAP(imm_v256_shl_n_64<24>), + MAP(imm_v256_shl_n_64<28>), + MAP(imm_v256_shl_n_64<32>), + MAP(imm_v256_shl_n_64<36>), + MAP(imm_v256_shl_n_64<40>), + MAP(imm_v256_shl_n_64<44>), + MAP(imm_v256_shl_n_64<48>), + MAP(imm_v256_shl_n_64<52>), + MAP(imm_v256_shl_n_64<56>), + MAP(imm_v256_shl_n_64<60>), + MAP(imm_v256_shr_n_u64<1>), + MAP(imm_v256_shr_n_u64<4>), + MAP(imm_v256_shr_n_u64<8>), + MAP(imm_v256_shr_n_u64<12>), + MAP(imm_v256_shr_n_u64<16>), + MAP(imm_v256_shr_n_u64<20>), + MAP(imm_v256_shr_n_u64<24>), + MAP(imm_v256_shr_n_u64<28>), + MAP(imm_v256_shr_n_u64<32>), + MAP(imm_v256_shr_n_u64<36>), + MAP(imm_v256_shr_n_u64<40>), + MAP(imm_v256_shr_n_u64<44>), + MAP(imm_v256_shr_n_u64<48>), + MAP(imm_v256_shr_n_u64<52>), + MAP(imm_v256_shr_n_u64<56>), + MAP(imm_v256_shr_n_u64<60>), + MAP(imm_v256_shr_n_s64<1>), + MAP(imm_v256_shr_n_s64<4>), + MAP(imm_v256_shr_n_s64<8>), + MAP(imm_v256_shr_n_s64<12>), + MAP(imm_v256_shr_n_s64<16>), + MAP(imm_v256_shr_n_s64<20>), + MAP(imm_v256_shr_n_s64<24>), + MAP(imm_v256_shr_n_s64<28>), + MAP(imm_v256_shr_n_s64<32>), + MAP(imm_v256_shr_n_s64<36>), + MAP(imm_v256_shr_n_s64<40>), + MAP(imm_v256_shr_n_s64<44>), + MAP(imm_v256_shr_n_s64<48>), + MAP(imm_v256_shr_n_s64<52>), + MAP(imm_v256_shr_n_s64<56>), + MAP(imm_v256_shr_n_s64<60>), + MAP(v256_movemask_8), + MAP(v256_zero), + MAP(v256_dup_8), + MAP(v256_dup_16), + MAP(v256_dup_32), + MAP(v256_dup_64), + MAP(v256_low_u32), + MAP(v256_low_v64), + MAP(v256_from_64), + MAP(v256_from_v64), + MAP(v256_ziplo_128), + MAP(v256_ziphi_128), + MAP(v256_unpacklo_u8_s16), + MAP(v256_unpackhi_u8_s16), + MAP(v256_unpacklo_s8_s16), + MAP(v256_unpackhi_s8_s16), + MAP(v256_blend_8), + { nullptr, nullptr, nullptr } }; +#undef MAP + +// Map reference functions to machine tuned functions. Since the +// functions depend on machine tuned types, the non-machine tuned +// instantiations of the test can't refer to these functions directly, +// so we refer to them by name and do the mapping here. +void Map(const char *name, fptr *ref, fptr *simd) { + unsigned int i; + for (i = 0; m[i].name && strcmp(name, m[i].name); i++) { + } + + *ref = m[i].ref; + *simd = m[i].simd; +} + +// Used for printing errors in TestSimd1Arg, TestSimd2Args and TestSimd3Args +std::string Print(const uint8_t *a, int size) { + std::string text = "0x"; + for (int i = 0; i < size; i++) { + const uint8_t c = a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i]; + // Same as snprintf(..., ..., "%02x", c) + text += (c >> 4) + '0' + ((c >> 4) > 9) * ('a' - '0' - 10); + text += (c & 15) + '0' + ((c & 15) > 9) * ('a' - '0' - 10); + } + + return text; +} + +// Used in TestSimd1Arg, TestSimd2Args and TestSimd3Args to restrict argument +// ranges +void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) { + switch (maskwidth) { + case 0: { + break; + } + case 8: { + for (int i = 0; i < size; i++) s[i] &= mask; + break; + } + case 16: { + uint16_t *t = reinterpret_cast<uint16_t *>(s); + assert(!(reinterpret_cast<uintptr_t>(s) & 1)); + for (int i = 0; i < size / 2; i++) t[i] &= mask; + break; + } + case 32: { + uint32_t *t = reinterpret_cast<uint32_t *>(s); + assert(!(reinterpret_cast<uintptr_t>(s) & 3)); + for (int i = 0; i < size / 4; i++) t[i] &= mask; + break; + } + case 64: { + uint64_t *t = reinterpret_cast<uint64_t *>(s); + assert(!(reinterpret_cast<uintptr_t>(s) & 7)); + for (int i = 0; i < size / 8; i++) t[i] &= mask; + break; + } + default: { + FAIL() << "Unsupported mask width"; + break; + } + } +} + +// We need some extra load/store functions +void u64_store_aligned(void *p, uint64_t a) { + v64_store_aligned(p, v64_from_64(a)); +} +void s32_store_aligned(void *p, int32_t a) { + u32_store_aligned(p, static_cast<uint32_t>(a)); +} +void s64_store_aligned(void *p, int64_t a) { + v64_store_aligned(p, v64_from_64(static_cast<uint64_t>(a))); +} + +void c_u64_store_aligned(void *p, uint64_t a) { + c_v64_store_aligned(p, c_v64_from_64(a)); +} + +void c_s32_store_aligned(void *p, int32_t a) { + c_u32_store_aligned(p, static_cast<uint32_t>(a)); +} + +void c_s64_store_aligned(void *p, int64_t a) { + c_v64_store_aligned(p, c_v64_from_64(static_cast<uint64_t>(a))); +} + +uint64_t u64_load_aligned(const void *p) { + return v64_u64(v64_load_aligned(p)); +} +uint16_t u16_load_aligned(const void *p) { + return *(reinterpret_cast<const uint16_t *>(p)); +} +uint8_t u8_load_aligned(const void *p) { + return *(reinterpret_cast<const uint8_t *>(p)); +} + +uint64_t c_u64_load_aligned(const void *p) { + return c_v64_u64(c_v64_load_aligned(p)); +} +uint16_t c_u16_load_aligned(const void *p) { + return *(reinterpret_cast<const uint16_t *>(p)); +} +uint8_t c_u8_load_aligned(const void *p) { + return *(reinterpret_cast<const uint8_t *>(p)); +} + +// CompareSimd1Arg, CompareSimd2Args and CompareSimd3Args compare +// intrinsics taking 1, 2 or 3 arguments respectively with their +// corresponding C reference. Ideally, the loads and stores should +// have gone into the template parameter list, but v64 and v128 could +// be typedef'ed to the same type (which is the case on x86) and then +// we can't instantiate both v64 and v128, so the function return and +// argument types, including the always differing types in the C +// equivalent are used instead. The function arguments must be void +// pointers and then go through a cast to avoid matching errors in the +// branches eliminated by the typeid tests in the calling function. +template <typename Ret, typename Arg, typename CRet, typename CArg> +int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store, + fptr c_load, fptr c_simd, void *ref_d, const void *a) { + void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store; + Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load; + Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd; + void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store; + CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load; + CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd; + + // Call reference and intrinsic + my_c_store(ref_d, my_c_simd(my_c_load(a))); + my_store(d, my_simd(my_load(a))); + + // Compare results + return memcmp(ref_d, d, sizeof(CRet)); +} + +template <typename Ret, typename Arg1, typename Arg2, typename CRet, + typename CArg1, typename CArg2> +int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d, + fptr c_store, fptr c_load1, fptr c_load2, fptr c_simd, + void *ref_d, const void *a, const void *b) { + void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store; + Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1; + Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2; + Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd; + void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store; + CArg1 (*const my_c_load1)(const void *) = + (CArg1(*const)(const void *))c_load1; + CArg2 (*const my_c_load2)(const void *) = + (CArg2(*const)(const void *))c_load2; + CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd; + + // Call reference and intrinsic + my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b))); + my_store(d, my_simd(my_load1(a), my_load2(b))); + + // Compare results + return memcmp(ref_d, d, sizeof(CRet)); +} + +template <typename Ret, typename Arg1, typename Arg2, typename Arg3, + typename CRet, typename CArg1, typename CArg2, typename CArg3> +int CompareSimd3Args(fptr store, fptr load1, fptr load2, fptr load3, fptr simd, + void *d, fptr c_store, fptr c_load1, fptr c_load2, + fptr c_load3, fptr c_simd, void *ref_d, const void *a, + const void *b, const void *c) { + void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store; + Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1; + Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2; + Arg3 (*const my_load3)(const void *) = (Arg3(*const)(const void *))load3; + Ret (*const my_simd)(Arg1, Arg2, Arg3) = (Ret(*const)(Arg1, Arg2, Arg3))simd; + void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store; + CArg1 (*const my_c_load1)(const void *) = + (CArg1(*const)(const void *))c_load1; + CArg2 (*const my_c_load2)(const void *) = + (CArg2(*const)(const void *))c_load2; + CArg3 (*const my_c_load3)(const void *) = + (CArg3(*const)(const void *))c_load3; + CRet (*const my_c_simd)(CArg1, CArg2, CArg3) = + (CRet(*const)(CArg1, CArg2, CArg3))c_simd; + + // Call reference and intrinsic + my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b), my_c_load3(c))); + my_store(d, my_simd(my_load1(a), my_load2(b), my_load3(c))); + + // Compare results + return memcmp(ref_d, d, sizeof(CRet)); +} + +} // namespace + +template <typename CRet, typename CArg> +void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + fptr ref_simd; + fptr simd; + int error = 0; + DECLARE_ALIGNED(32, uint8_t, s[32]); + DECLARE_ALIGNED(32, uint8_t, d[32]); + DECLARE_ALIGNED(32, uint8_t, ref_d[32]); + assert(sizeof(CArg) <= 32 && sizeof(CRet) <= 32); + memset(ref_d, 0, sizeof(ref_d)); + memset(d, 0, sizeof(d)); + + Map(name, &ref_simd, &simd); + if (simd == nullptr || ref_simd == nullptr) { + FAIL() << "Internal error: Unknown intrinsic function " << name; + } + for (unsigned int count = 0; + count < iterations && !error && !testing::Test::HasFailure(); count++) { + for (unsigned int c = 0; c < sizeof(CArg); c++) s[c] = rnd.Rand8(); + + if (maskwidth) { + SetMask(s, sizeof(CArg), mask, maskwidth); + } + + if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) { + // V64_V64 + error = CompareSimd1Arg<v64, v64, c_v64, c_v64>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(uint8_t)) { + // V64_U8 + error = CompareSimd1Arg<v64, uint8_t, c_v64, uint8_t>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(u8_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(uint16_t)) { + // V64_U16 + error = CompareSimd1Arg<v64, uint16_t, c_v64, uint16_t>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(u16_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(uint32_t)) { + // V64_U32 + error = CompareSimd1Arg<v64, uint32_t, c_v64, uint32_t>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint64_t) && + typeid(CArg) == typeid(c_v64)) { + // U64_V64 + error = CompareSimd1Arg<uint64_t, v64, uint64_t, c_v64>( + reinterpret_cast<fptr>(u64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(int64_t) && + typeid(CArg) == typeid(c_v64)) { + // S64_V64 + error = CompareSimd1Arg<int64_t, v64, int64_t, c_v64>( + reinterpret_cast<fptr>(s64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_s64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg) == typeid(c_v64)) { + // U32_V64 + error = CompareSimd1Arg<uint32_t, v64, uint32_t, c_v64>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(int32_t) && + typeid(CArg) == typeid(c_v64)) { + // S32_V64 + error = CompareSimd1Arg<int32_t, v64, int32_t, c_v64>( + reinterpret_cast<fptr>(s32_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_s32_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg) == typeid(c_v128)) { + // U32_V128 + error = CompareSimd1Arg<uint32_t, v128, uint32_t, c_v128>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint64_t) && + typeid(CArg) == typeid(c_v128)) { + // U64_V128 + error = CompareSimd1Arg<uint64_t, v128, uint64_t, c_v128>( + reinterpret_cast<fptr>(u64_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u64_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint64_t) && + typeid(CArg) == typeid(c_v256)) { + // U64_V256 + error = CompareSimd1Arg<uint64_t, v256, uint64_t, c_v256>( + reinterpret_cast<fptr>(u64_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u64_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(c_v128)) { + // V64_V128 + error = CompareSimd1Arg<v64, v128, c_v64, c_v128>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(c_v128)) { + // V128_V128 + error = CompareSimd1Arg<v128, v128, c_v128, c_v128>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(c_v64)) { + // V128_V64 + error = CompareSimd1Arg<v128, v64, c_v128, c_v64>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(uint8_t)) { + // V128_U8 + error = CompareSimd1Arg<v128, uint8_t, c_v128, uint8_t>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(u8_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(uint16_t)) { + // V128_U16 + error = CompareSimd1Arg<v128, uint16_t, c_v128, uint16_t>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(u16_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(uint32_t)) { + // V128_U32 + error = CompareSimd1Arg<v128, uint32_t, c_v128, uint32_t>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg) == typeid(uint64_t)) { + // V128_U64 + error = CompareSimd1Arg<v128, uint64_t, c_v128, uint64_t>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(u64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(c_v256)) { + // V256_V256 + error = CompareSimd1Arg<v256, v256, c_v256, c_v256>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(c_v128)) { + // V256_V128 + error = CompareSimd1Arg<v256, v128, c_v256, c_v128>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(uint8_t)) { + // V256_U8 + error = CompareSimd1Arg<v256, uint8_t, c_v256, uint8_t>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(u8_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(uint16_t)) { + // V256_U16 + error = CompareSimd1Arg<v256, uint16_t, c_v256, uint16_t>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(u16_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(uint32_t)) { + // V256_U32 + error = CompareSimd1Arg<v256, uint32_t, c_v256, uint32_t>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(uint64_t)) { + // V256_U64 + error = CompareSimd1Arg<v256, uint64_t, c_v256, uint64_t>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(u64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg) == typeid(c_v256)) { + // U32_V256 + error = CompareSimd1Arg<uint32_t, v256, uint32_t, c_v256>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(c_v256)) { + // V64_V256 + error = CompareSimd1Arg<v64, v256, c_v64, c_v256>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s); + } else { + FAIL() << "Internal error: Unknown intrinsic function " + << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name() + << ")"; + } + } + + EXPECT_EQ(0, error) << "Error: mismatch for " << name << "(" + << Print(s, sizeof(CArg)) << ") -> " + << Print(d, sizeof(CRet)) << " (simd), " + << Print(ref_d, sizeof(CRet)) << " (ref)"; +} + +template <typename CRet, typename CArg1, typename CArg2> +void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + fptr ref_simd; + fptr simd; + int error = 0; + DECLARE_ALIGNED(32, uint8_t, s1[32]); + DECLARE_ALIGNED(32, uint8_t, s2[32]); + DECLARE_ALIGNED(32, uint8_t, d[32]); + DECLARE_ALIGNED(32, uint8_t, ref_d[32]); + assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CRet) <= 32); + memset(ref_d, 0, sizeof(ref_d)); + memset(d, 0, sizeof(d)); + + Map(name, &ref_simd, &simd); + if (simd == nullptr || ref_simd == nullptr) { + FAIL() << "Internal error: Unknown intrinsic function " << name; + } + + for (unsigned int count = 0; + count < iterations && !error && !testing::Test::HasFailure(); count++) { + for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8(); + + for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8(); + + if (maskwidth) SetMask(s2, sizeof(CArg2), mask, maskwidth); + + if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(c_v64)) { + // V64_V64V64 + error = CompareSimd2Args<v64, v64, v64, c_v64, c_v64, c_v64>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg1) == typeid(uint32_t) && + typeid(CArg2) == typeid(uint32_t)) { + // V64_U32U32 + error = + CompareSimd2Args<v64, uint32_t, uint32_t, c_v64, uint32_t, uint32_t>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(u32_load_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(c_v64)) { + // U32_V64V64 + error = CompareSimd2Args<uint32_t, v64, v64, uint32_t, c_v64, c_v64>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(int64_t) && + typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(c_v64)) { + // S64_V64V64 + error = CompareSimd2Args<int64_t, v64, v64, int64_t, c_v64, c_v64>( + reinterpret_cast<fptr>(s64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_s64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(uint32_t)) { + // V64_V64U32 + error = CompareSimd2Args<v64, v64, uint32_t, c_v64, c_v64, uint32_t>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128)) { + // V128_V128V128 + error = CompareSimd2Args<v128, v128, v128, c_v128, c_v128, c_v128>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128)) { + // U32_V128V128 + error = CompareSimd2Args<uint32_t, v128, v128, uint32_t, c_v128, c_v128>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(uint64_t) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128)) { + // U64_V128V128 + error = CompareSimd2Args<uint64_t, v128, v128, uint64_t, c_v128, c_v128>( + reinterpret_cast<fptr>(u64_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u64_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(int64_t) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128)) { + // S64_V128V128 + error = CompareSimd2Args<int64_t, v128, v128, int64_t, c_v128, c_v128>( + reinterpret_cast<fptr>(s64_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_s64_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg1) == typeid(uint64_t) && + typeid(CArg2) == typeid(uint64_t)) { + // V128_U64U64 + error = CompareSimd2Args<v128, uint64_t, uint64_t, c_v128, uint64_t, + uint64_t>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(u64_load_aligned), + reinterpret_cast<fptr>(u64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_u64_load_aligned), + reinterpret_cast<fptr>(c_u64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg1) == typeid(c_v64) && + typeid(CArg2) == typeid(c_v64)) { + // V128_V64V64 + error = CompareSimd2Args<v128, v64, v64, c_v128, c_v64, c_v64>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v64_load_aligned), + reinterpret_cast<fptr>(v64_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(c_v64_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v128) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(uint32_t)) { + // V128_V128U32 + error = CompareSimd2Args<v128, v128, uint32_t, c_v128, c_v128, uint32_t>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(c_v256)) { + // V256_V256V256 + error = CompareSimd2Args<v256, v256, v256, c_v256, c_v256, c_v256>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(uint64_t) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(c_v256)) { + // U64_V256V256 + error = CompareSimd2Args<uint64_t, v256, v256, uint64_t, c_v256, c_v256>( + reinterpret_cast<fptr>(u64_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u64_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(int64_t) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(c_v256)) { + // S64_V256V256 + error = CompareSimd2Args<int64_t, v256, v256, int64_t, c_v256, c_v256>( + reinterpret_cast<fptr>(s64_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_s64_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(c_v256)) { + // U32_V256V256 + error = CompareSimd2Args<uint32_t, v256, v256, uint32_t, c_v256, c_v256>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128)) { + // V256_V128V128 + error = CompareSimd2Args<v256, v128, v128, c_v256, c_v128, c_v128>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(uint32_t)) { + // V256_V256U32 + error = CompareSimd2Args<v256, v256, uint32_t, c_v256, c_v256, uint32_t>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + + } else { + FAIL() << "Internal error: Unknown intrinsic function " + << typeid(CRet).name() << " " << name << "(" + << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")"; + } + } + + EXPECT_EQ(0, error) << "Error: mismatch for " << name << "(" + << Print(s1, sizeof(CArg1)) << ", " + << Print(s2, sizeof(CArg2)) << ") -> " + << Print(d, sizeof(CRet)) << " (simd), " + << Print(ref_d, sizeof(CRet)) << " (ref)"; +} + +template <typename CRet, typename CArg1, typename CArg2, typename CArg3> +void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + fptr ref_simd; + fptr simd; + int error = 0; + DECLARE_ALIGNED(32, uint8_t, s1[32]); + DECLARE_ALIGNED(32, uint8_t, s2[32]); + DECLARE_ALIGNED(32, uint8_t, s3[32]); + DECLARE_ALIGNED(32, uint8_t, d[32]); + DECLARE_ALIGNED(32, uint8_t, ref_d[32]); + assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CArg3) <= 32 && + sizeof(CRet) <= 32); + memset(ref_d, 0, sizeof(ref_d)); + memset(d, 0, sizeof(d)); + + Map(name, &ref_simd, &simd); + if (simd == nullptr || ref_simd == nullptr) { + FAIL() << "Internal error: Unknown intrinsic function " << name; + } + + for (unsigned int count = 0; + count < iterations && !error && !testing::Test::HasFailure(); count++) { + for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8(); + + for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8(); + + for (unsigned int c = 0; c < sizeof(CArg3); c++) s3[c] = rnd.Rand8(); + + if (maskwidth) SetMask(s3, sizeof(CArg3), mask, maskwidth); + + if (typeid(CRet) == typeid(c_v128) && typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128) && typeid(CArg3) == typeid(c_v128)) { + // V128_V128V128V128 + error = CompareSimd3Args<v128, v128, v128, v128, c_v128, c_v128, c_v128, + c_v128>( + reinterpret_cast<fptr>(v128_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v128_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(c_v256) && + typeid(CArg3) == typeid(c_v256)) { + // V256_V256V256V256 + error = CompareSimd3Args<v256, v256, v256, v256, c_v256, c_v256, c_v256, + c_v256>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3); + } else { + FAIL() << "Internal error: Unknown intrinsic function " + << typeid(CRet).name() << " " << name << "(" + << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ", " + << typeid(CArg3).name() << ")"; + } + } + + EXPECT_EQ(0, error) << "Error: mismatch for " << name << "(" + << Print(s1, sizeof(CArg1)) << ", " + << Print(s2, sizeof(CArg2)) << ", " + << Print(s3, sizeof(CArg3)) << ") -> " + << Print(d, sizeof(CRet)) << " (simd), " + << Print(ref_d, sizeof(CRet)) << " (ref)"; +} + +// Instantiations to make the functions callable from another files +template void TestSimd1Arg<c_v64, uint8_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, uint16_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, uint32_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint32_t, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<int32_t, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<c_v64, uint32_t, uint32_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd1Arg<c_v128, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v128, uint8_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v128, uint16_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v128, uint32_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v128, uint64_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v128, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint32_t, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint64_t, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<c_v128, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v128, c_v128, uint32_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v128, uint64_t, uint64_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v128, c_v64, c_v64>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<uint64_t, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, + const char *); +template void TestSimd1Arg<c_v256, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v256, c_v256>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint64_t, c_v256>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v256, uint8_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v256, uint16_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v256, uint32_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v256, uint64_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint32_t, c_v256>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, c_v256>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<c_v256, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v256, c_v256, c_v256>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v256, c_v256, uint32_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<uint64_t, c_v256, c_v256>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<int64_t, c_v256, c_v256>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<uint32_t, c_v256, c_v256>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(uint32_t, uint32_t, + uint32_t, + const char *); + +} // namespace SIMD_NAMESPACE diff --git a/third_party/aom/test/simd_cmp_sse2.cc b/third_party/aom/test/simd_cmp_sse2.cc new file mode 100644 index 0000000000..f7827a7fa1 --- /dev/null +++ b/third_party/aom/test/simd_cmp_sse2.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSE2 +#define ARCH_POSTFIX(name) name##_sse2 +#define SIMD_NAMESPACE simd_test_sse2 +#include "test/simd_cmp_impl.h" +#endif diff --git a/third_party/aom/test/simd_cmp_sse4.cc b/third_party/aom/test/simd_cmp_sse4.cc new file mode 100644 index 0000000000..3566764b64 --- /dev/null +++ b/third_party/aom/test/simd_cmp_sse4.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSE4_1 +#define ARCH_POSTFIX(name) name##_sse4_1 +#define SIMD_NAMESPACE simd_test_sse4_1 +#include "test/simd_cmp_impl.h" +#endif diff --git a/third_party/aom/test/simd_cmp_ssse3.cc b/third_party/aom/test/simd_cmp_ssse3.cc new file mode 100644 index 0000000000..57bf135ddb --- /dev/null +++ b/third_party/aom/test/simd_cmp_ssse3.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSSE3 +#define ARCH_POSTFIX(name) name##_ssse3 +#define SIMD_NAMESPACE simd_test_ssse3 +#include "test/simd_cmp_impl.h" +#endif diff --git a/third_party/aom/test/simd_impl.h b/third_party/aom/test/simd_impl.h new file mode 100644 index 0000000000..b564a7f4b3 --- /dev/null +++ b/third_party/aom/test/simd_impl.h @@ -0,0 +1,1140 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> + +#define SIMD_CHECK 1 +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/register_state_check.h" +#include "aom_dsp/aom_simd_inline.h" +#include "aom_dsp/simd/v256_intrinsics_c.h" + +namespace SIMD_NAMESPACE { + +template <typename param_signature> +class TestIntrinsic : public ::testing::TestWithParam<param_signature> { + public: + ~TestIntrinsic() override = default; + void SetUp() override { + mask = std::get<0>(this->GetParam()); + maskwidth = std::get<1>(this->GetParam()); + name = std::get<2>(this->GetParam()); + } + + protected: + uint32_t mask, maskwidth; + const char *name; +}; + +// Create one typedef for each function signature +#define TYPEDEF_SIMD(name) \ + typedef TestIntrinsic<std::tuple<uint32_t, uint32_t, const char *> > \ + ARCH_POSTFIX(name) + +TYPEDEF_SIMD(V64_U8); +TYPEDEF_SIMD(V64_U16); +TYPEDEF_SIMD(V64_U32); +TYPEDEF_SIMD(V64_V64); +TYPEDEF_SIMD(U32_V64); +TYPEDEF_SIMD(S32_V64); +TYPEDEF_SIMD(U64_V64); +TYPEDEF_SIMD(S64_V64); +TYPEDEF_SIMD(V64_U32U32); +TYPEDEF_SIMD(V64_V64V64); +TYPEDEF_SIMD(S64_V64V64); +TYPEDEF_SIMD(V64_V64U32); +TYPEDEF_SIMD(U32_V64V64); +TYPEDEF_SIMD(V128_V64); +TYPEDEF_SIMD(V128_V128); +TYPEDEF_SIMD(U32_V128); +TYPEDEF_SIMD(U64_V128); +TYPEDEF_SIMD(V64_V128); +TYPEDEF_SIMD(V128_U8); +TYPEDEF_SIMD(V128_U16); +TYPEDEF_SIMD(V128_U32); +TYPEDEF_SIMD(V128_U64); +TYPEDEF_SIMD(V128_U64U64); +TYPEDEF_SIMD(V128_V64V64); +TYPEDEF_SIMD(V128_V128V128); +TYPEDEF_SIMD(V128_V128V128V128); +TYPEDEF_SIMD(S64_V128V128); +TYPEDEF_SIMD(V128_V128U32); +TYPEDEF_SIMD(U32_V128V128); +TYPEDEF_SIMD(U64_V128V128); +TYPEDEF_SIMD(V256_V128); +TYPEDEF_SIMD(V256_V256); +TYPEDEF_SIMD(U64_V256); +TYPEDEF_SIMD(V256_V128V128); +TYPEDEF_SIMD(V256_V256V256); +TYPEDEF_SIMD(V256_V256V256V256); +TYPEDEF_SIMD(U64_V256V256); +TYPEDEF_SIMD(S64_V256V256); +TYPEDEF_SIMD(V256_V256U32); +TYPEDEF_SIMD(U32_V256V256); +TYPEDEF_SIMD(V256_U8); +TYPEDEF_SIMD(V256_U16); +TYPEDEF_SIMD(V256_U32); +TYPEDEF_SIMD(V256_U64); +TYPEDEF_SIMD(U32_V256); +TYPEDEF_SIMD(V64_V256); + +// Google Test allows up to 50 tests per case, so split the largest +typedef ARCH_POSTFIX(V64_V64) ARCH_POSTFIX(V64_V64_Part2); +typedef ARCH_POSTFIX(V64_V64V64) ARCH_POSTFIX(V64_V64V64_Part2); +typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part2); +typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part3); +typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part4); +typedef ARCH_POSTFIX(V128_V128V128) ARCH_POSTFIX(V128_V128V128_Part2); +typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part2); +typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part3); +typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part4); +typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part5); +typedef ARCH_POSTFIX(V256_V256V256) ARCH_POSTFIX(V256_V256V256_Part2); + +// These functions are machine tuned located elsewhere +template <typename c_ret, typename c_arg> +void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name); + +template <typename c_ret, typename c_arg1, typename c_arg2> +void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name); + +template <typename c_ret, typename c_arg1, typename c_arg2, typename c_arg3> +void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth, + const char *name); + +const int kIterations = 65536; + +// Add a macro layer since TEST_P will quote the name so we need to +// expand it first with the prefix. +#define MY_TEST_P(name, test) TEST_P(name, test) + +MY_TEST_P(ARCH_POSTFIX(V64_U8), TestIntrinsics) { + TestSimd1Arg<c_v64, uint8_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_U16), TestIntrinsics) { + TestSimd1Arg<c_v64, uint16_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_U32), TestIntrinsics) { + TestSimd1Arg<c_v64, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V64), TestIntrinsics) { + TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U64_V64), TestIntrinsics) { + TestSimd1Arg<uint64_t, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(S64_V64), TestIntrinsics) { + TestSimd1Arg<int64_t, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V64), TestIntrinsics) { + TestSimd1Arg<uint32_t, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(S32_V64), TestIntrinsics) { + TestSimd1Arg<int32_t, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_U32U32), TestIntrinsics) { + TestSimd2Args<c_v64, uint32_t, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V64V64), TestIntrinsics) { + TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(S64_V64V64), TestIntrinsics) { + TestSimd2Args<int64_t, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V64V64), TestIntrinsics) { + TestSimd2Args<uint32_t, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V64U32), TestIntrinsics) { + TestSimd2Args<c_v64, c_v64, uint32_t>(kIterations, mask, maskwidth, name); +} + +// Google Test allows up to 50 tests per case, so split the largest +MY_TEST_P(ARCH_POSTFIX(V64_V64_Part2), TestIntrinsics) { + TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V64V64_Part2), TestIntrinsics) { + TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V128), TestIntrinsics) { + TestSimd1Arg<uint32_t, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U64_V128), TestIntrinsics) { + TestSimd1Arg<uint64_t, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V128), TestIntrinsics) { + TestSimd1Arg<c_v64, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128), TestIntrinsics) { + TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_U8), TestIntrinsics) { + TestSimd1Arg<c_v128, uint8_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_U16), TestIntrinsics) { + TestSimd1Arg<c_v128, uint16_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_U32), TestIntrinsics) { + TestSimd1Arg<c_v128, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_U64), TestIntrinsics) { + TestSimd1Arg<c_v128, uint64_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V64), TestIntrinsics) { + TestSimd1Arg<c_v128, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128V128), TestIntrinsics) { + TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128V128V128), TestIntrinsics) { + TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, + name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V128V128), TestIntrinsics) { + TestSimd2Args<uint32_t, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U64_V128V128), TestIntrinsics) { + TestSimd2Args<uint64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(S64_V128V128), TestIntrinsics) { + TestSimd2Args<int64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_U64U64), TestIntrinsics) { + TestSimd2Args<c_v128, uint64_t, uint64_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V64V64), TestIntrinsics) { + TestSimd2Args<c_v128, c_v64, c_v64>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128U32), TestIntrinsics) { + TestSimd2Args<c_v128, c_v128, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128V128_Part2), TestIntrinsics) { + TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128_Part2), TestIntrinsics) { + TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) { + TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V128_V128_Part4), TestIntrinsics) { + TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U64_V256), TestIntrinsics) { + TestSimd1Arg<uint64_t, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V256), TestIntrinsics) { + TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V128), TestIntrinsics) { + TestSimd1Arg<c_v256, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V256V256), TestIntrinsics) { + TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V256V256V256), TestIntrinsics) { + TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, + name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V128V128), TestIntrinsics) { + TestSimd2Args<c_v256, c_v128, c_v128>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V256V256), TestIntrinsics) { + TestSimd2Args<uint32_t, c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U64_V256V256), TestIntrinsics) { + TestSimd2Args<uint64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(S64_V256V256), TestIntrinsics) { + TestSimd2Args<int64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V256V256_Part2), TestIntrinsics) { + TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V256U32), TestIntrinsics) { + TestSimd2Args<c_v256, c_v256, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V256_Part2), TestIntrinsics) { + TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V256_Part3), TestIntrinsics) { + TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V256_Part4), TestIntrinsics) { + TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_V256_Part5), TestIntrinsics) { + TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_U8), TestIntrinsics) { + TestSimd1Arg<c_v256, uint8_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_U16), TestIntrinsics) { + TestSimd1Arg<c_v256, uint16_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_U32), TestIntrinsics) { + TestSimd1Arg<c_v256, uint32_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V256_U64), TestIntrinsics) { + TestSimd1Arg<c_v256, uint64_t>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(U32_V256), TestIntrinsics) { + TestSimd1Arg<uint32_t, c_v256>(kIterations, mask, maskwidth, name); +} + +MY_TEST_P(ARCH_POSTFIX(V64_V256), TestIntrinsics) { + TestSimd1Arg<c_v64, c_v256>(kIterations, mask, maskwidth, name); +} + +// Add a macro layer since INSTANTIATE_TEST_SUITE_P will quote the name +// so we need to expand it first with the prefix +#define INSTANTIATE(name, type, ...) \ + INSTANTIATE_TEST_SUITE_P(name, type, ::testing::Values(__VA_ARGS__)) + +#define SIMD_TUPLE(name, mask, maskwidth) \ + std::make_tuple(mask, maskwidth, static_cast<const char *>(#name)) + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64V64), SIMD_TUPLE(v64_sad_u8, 0U, 0U), + SIMD_TUPLE(v64_ssd_u8, 0U, 0U)); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V64_V64V64), SIMD_TUPLE(v64_add_8, 0U, 0U), + SIMD_TUPLE(v64_add_16, 0U, 0U), SIMD_TUPLE(v64_sadd_s16, 0U, 0U), + SIMD_TUPLE(v64_add_32, 0U, 0U), SIMD_TUPLE(v64_sub_8, 0U, 0U), + SIMD_TUPLE(v64_ssub_u8, 0U, 0U), SIMD_TUPLE(v64_ssub_s8, 0U, 0U), + SIMD_TUPLE(v64_sub_16, 0U, 0U), SIMD_TUPLE(v64_ssub_s16, 0U, 0U), + SIMD_TUPLE(v64_ssub_u16, 0U, 0U), SIMD_TUPLE(v64_sub_32, 0U, 0U), + SIMD_TUPLE(v64_ziplo_8, 0U, 0U), SIMD_TUPLE(v64_ziphi_8, 0U, 0U), + SIMD_TUPLE(v64_ziplo_16, 0U, 0U), SIMD_TUPLE(v64_ziphi_16, 0U, 0U), + SIMD_TUPLE(v64_ziplo_32, 0U, 0U), SIMD_TUPLE(v64_ziphi_32, 0U, 0U), + SIMD_TUPLE(v64_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v64_pack_s16_u8, 0U, 0U), + SIMD_TUPLE(v64_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v64_unziphi_8, 0U, 0U), + SIMD_TUPLE(v64_unziplo_8, 0U, 0U), SIMD_TUPLE(v64_unziphi_16, 0U, 0U), + SIMD_TUPLE(v64_unziplo_16, 0U, 0U), SIMD_TUPLE(v64_or, 0U, 0U), + SIMD_TUPLE(v64_xor, 0U, 0U), SIMD_TUPLE(v64_and, 0U, 0U), + SIMD_TUPLE(v64_andn, 0U, 0U), SIMD_TUPLE(v64_mullo_s16, 0U, 0U), + SIMD_TUPLE(v64_mulhi_s16, 0U, 0U), SIMD_TUPLE(v64_mullo_s32, 0U, 0U), + SIMD_TUPLE(v64_madd_s16, 0U, 0U), SIMD_TUPLE(v64_madd_us8, 0U, 0U), + SIMD_TUPLE(v64_avg_u8, 0U, 0U), SIMD_TUPLE(v64_rdavg_u8, 0U, 0U), + SIMD_TUPLE(v64_avg_u16, 0U, 0U), SIMD_TUPLE(v64_min_u8, 0U, 0U), + SIMD_TUPLE(v64_max_u8, 0U, 0U), SIMD_TUPLE(v64_min_s8, 0U, 0U), + SIMD_TUPLE(v64_max_s8, 0U, 0U), SIMD_TUPLE(v64_min_s16, 0U, 0U), + SIMD_TUPLE(v64_max_s16, 0U, 0U), SIMD_TUPLE(v64_cmpgt_s8, 0U, 0U), + SIMD_TUPLE(v64_cmplt_s8, 0U, 0U), SIMD_TUPLE(v64_cmpeq_8, 0U, 0U), + SIMD_TUPLE(v64_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v64_cmplt_s16, 0U, 0U), + SIMD_TUPLE(v64_cmpeq_16, 0U, 0U)); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V64_V64V64_Part2), SIMD_TUPLE(v64_shuffle_8, 7U, 8U), + SIMD_TUPLE(v64_pack_s32_u16, 0U, 0U), SIMD_TUPLE(v64_rdavg_u16, 0U, 0U), + SIMD_TUPLE(v64_sadd_s8, 0U, 0U), SIMD_TUPLE(v64_sadd_u8, 0U, 0U), + SIMD_TUPLE(imm_v64_align<1>, 0U, 0U), SIMD_TUPLE(imm_v64_align<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_align<3>, 0U, 0U), SIMD_TUPLE(imm_v64_align<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_align<5>, 0U, 0U), SIMD_TUPLE(imm_v64_align<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_align<7>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64), SIMD_TUPLE(v64_abs_s8, 0U, 0U), + SIMD_TUPLE(v64_abs_s16, 0U, 0U), + SIMD_TUPLE(v64_unpacklo_u8_s16, 0U, 0U), + SIMD_TUPLE(v64_unpackhi_u8_s16, 0U, 0U), + SIMD_TUPLE(v64_unpacklo_s8_s16, 0U, 0U), + SIMD_TUPLE(v64_unpackhi_s8_s16, 0U, 0U), + SIMD_TUPLE(v64_unpacklo_u16_s32, 0U, 0U), + SIMD_TUPLE(v64_unpacklo_s16_s32, 0U, 0U), + SIMD_TUPLE(v64_unpackhi_u16_s32, 0U, 0U), + SIMD_TUPLE(v64_unpackhi_s16_s32, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_8<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u8<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<3>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<5>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s8<7>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64_Part2), + SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_16<14>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<10>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u16<14>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<2>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<6>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<10>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s16<14>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<16>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<20>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<24>, 0U, 0U), + SIMD_TUPLE(imm_v64_shl_n_32<28>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<16>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<20>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<24>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_u32<28>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<1>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<4>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<8>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<12>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<16>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<20>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<24>, 0U, 0U), + SIMD_TUPLE(imm_v64_shr_n_s32<28>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64U32), SIMD_TUPLE(v64_shl_8, 7U, 32U), + SIMD_TUPLE(v64_shr_u8, 7U, 32U), SIMD_TUPLE(v64_shr_s8, 7U, 32U), + SIMD_TUPLE(v64_shl_16, 15U, 32U), SIMD_TUPLE(v64_shr_u16, 15U, 32U), + SIMD_TUPLE(v64_shr_s16, 15U, 32U), SIMD_TUPLE(v64_shl_32, 31U, 32U), + SIMD_TUPLE(v64_shr_u32, 31U, 32U), + SIMD_TUPLE(v64_shr_s32, 31U, 32U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V64), SIMD_TUPLE(v64_hadd_u8, 0U, 0U), + SIMD_TUPLE(v64_u64, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64), SIMD_TUPLE(v64_hadd_s16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64), SIMD_TUPLE(v64_low_u32, 0U, 0U), + SIMD_TUPLE(v64_high_u32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(S32_V64), SIMD_TUPLE(v64_low_s32, 0U, 0U), + SIMD_TUPLE(v64_high_s32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64V64), SIMD_TUPLE(v64_dotp_s16, 0U, 0U), + SIMD_TUPLE(v64_dotp_su8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U8), SIMD_TUPLE(v64_dup_8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U16), SIMD_TUPLE(v64_dup_16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32), SIMD_TUPLE(v64_dup_32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32U32), SIMD_TUPLE(v64_from_32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128V128), SIMD_TUPLE(v128_sad_u8, 0U, 0U), + SIMD_TUPLE(v128_ssd_u8, 0U, 0U), SIMD_TUPLE(v128_sad_u16, 0U, 0U)); +INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128V128), SIMD_TUPLE(v128_ssd_s16, 0U, 0U)); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V128_V128V128), SIMD_TUPLE(v128_add_8, 0U, 0U), + SIMD_TUPLE(v128_add_16, 0U, 0U), SIMD_TUPLE(v128_sadd_s16, 0U, 0U), + SIMD_TUPLE(v128_add_32, 0U, 0U), SIMD_TUPLE(v128_sub_8, 0U, 0U), + SIMD_TUPLE(v128_ssub_u8, 0U, 0U), SIMD_TUPLE(v128_ssub_s8, 0U, 0U), + SIMD_TUPLE(v128_sub_16, 0U, 0U), SIMD_TUPLE(v128_ssub_s16, 0U, 0U), + SIMD_TUPLE(v128_ssub_u16, 0U, 0U), SIMD_TUPLE(v128_sub_32, 0U, 0U), + SIMD_TUPLE(v128_ziplo_8, 0U, 0U), SIMD_TUPLE(v128_ziphi_8, 0U, 0U), + SIMD_TUPLE(v128_ziplo_16, 0U, 0U), SIMD_TUPLE(v128_ziphi_16, 0U, 0U), + SIMD_TUPLE(v128_ziplo_32, 0U, 0U), SIMD_TUPLE(v128_ziphi_32, 0U, 0U), + SIMD_TUPLE(v128_ziplo_64, 0U, 0U), SIMD_TUPLE(v128_ziphi_64, 0U, 0U), + SIMD_TUPLE(v128_unziphi_8, 0U, 0U), SIMD_TUPLE(v128_unziplo_8, 0U, 0U), + SIMD_TUPLE(v128_unziphi_16, 0U, 0U), SIMD_TUPLE(v128_unziplo_16, 0U, 0U), + SIMD_TUPLE(v128_unziphi_32, 0U, 0U), SIMD_TUPLE(v128_unziplo_32, 0U, 0U), + SIMD_TUPLE(v128_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v128_pack_s16_u8, 0U, 0U), + SIMD_TUPLE(v128_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v128_or, 0U, 0U), + SIMD_TUPLE(v128_xor, 0U, 0U), SIMD_TUPLE(v128_and, 0U, 0U), + SIMD_TUPLE(v128_andn, 0U, 0U), SIMD_TUPLE(v128_mullo_s16, 0U, 0U), + SIMD_TUPLE(v128_mulhi_s16, 0U, 0U), SIMD_TUPLE(v128_mullo_s32, 0U, 0U), + SIMD_TUPLE(v128_madd_s16, 0U, 0U), SIMD_TUPLE(v128_madd_us8, 0U, 0U), + SIMD_TUPLE(v128_avg_u8, 0U, 0U), SIMD_TUPLE(v128_rdavg_u8, 0U, 0U), + SIMD_TUPLE(v128_avg_u16, 0U, 0U), SIMD_TUPLE(v128_min_u8, 0U, 0U), + SIMD_TUPLE(v128_max_u8, 0U, 0U), SIMD_TUPLE(v128_min_s8, 0U, 0U), + SIMD_TUPLE(v128_max_s8, 0U, 0U), SIMD_TUPLE(v128_min_s16, 0U, 0U), + SIMD_TUPLE(v128_max_s16, 0U, 0U), SIMD_TUPLE(v128_cmpgt_s8, 0U, 0U), + SIMD_TUPLE(v128_cmplt_s8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_8, 0U, 0U), + SIMD_TUPLE(v128_cmpgt_s16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2), + SIMD_TUPLE(v128_pack_s32_u16, 0U, 0U), + SIMD_TUPLE(v128_rdavg_u16, 0U, 0U), SIMD_TUPLE(v128_add_64, 0U, 0U), + SIMD_TUPLE(v128_sub_64, 0U, 0U), SIMD_TUPLE(v128_sadd_s8, 0U, 0U), + SIMD_TUPLE(v128_sadd_u8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_16, 0U, 0U), + SIMD_TUPLE(v128_cmplt_s16, 0U, 0U), + SIMD_TUPLE(v128_cmplt_s32, 0U, 0U), + SIMD_TUPLE(v128_cmpeq_32, 0U, 0U), + SIMD_TUPLE(v128_cmpgt_s32, 0U, 0U), + SIMD_TUPLE(v128_shuffle_8, 15U, 8U), + SIMD_TUPLE(v128_min_s32, 0U, 0U), SIMD_TUPLE(v128_max_s32, 0U, 0U), + SIMD_TUPLE(imm_v128_align<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<9>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<11>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<13>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_align<15>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128V128), + SIMD_TUPLE(v128_blend_8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U), + SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U), + SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpacklo_s8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpacklo_u16_s32, 0U, 0U), + SIMD_TUPLE(v128_unpacklo_s16_s32, 0U, 0U), + SIMD_TUPLE(v128_unpackhi_u8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpackhi_s8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpackhi_u16_s32, 0U, 0U), + SIMD_TUPLE(v128_unpackhi_s16_s32, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<9>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<11>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<13>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_byte<15>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<9>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<11>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<13>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_byte<15>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_8<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2), + SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u8<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<3>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<5>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s8<7>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_16<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u16<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<2>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<6>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<10>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s16<14>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<16>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<20>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<24>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_32<28>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3), + SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<16>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<20>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<24>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u32<28>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<16>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<20>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<24>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s32<28>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part4), + SIMD_TUPLE(imm_v128_shl_n_64<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<16>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<20>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<24>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<28>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<32>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<36>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<40>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<44>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<48>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<52>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<56>, 0U, 0U), + SIMD_TUPLE(imm_v128_shl_n_64<60>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<16>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<20>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<24>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<28>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<32>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<36>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<40>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<44>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<48>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<52>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<56>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_u64<60>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<1>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<4>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<8>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<12>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<16>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<20>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<24>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<28>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<32>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<36>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<40>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<44>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<48>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<52>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<56>, 0U, 0U), + SIMD_TUPLE(imm_v128_shr_n_s64<60>, 0U, 0U), + SIMD_TUPLE(v128_padd_u8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64V64), SIMD_TUPLE(v128_from_v64, 0U, 0U), + SIMD_TUPLE(v128_zip_8, 0U, 0U), SIMD_TUPLE(v128_zip_16, 0U, 0U), + SIMD_TUPLE(v128_zip_32, 0U, 0U), SIMD_TUPLE(v128_mul_s16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64U64), SIMD_TUPLE(v128_from_64, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64), + SIMD_TUPLE(v128_unpack_u8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpack_s8_s16, 0U, 0U), + SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U), + SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U)); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U), + SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U), + SIMD_TUPLE(v128_shl_16, 15U, 32U), SIMD_TUPLE(v128_shr_u16, 15U, 32U), + SIMD_TUPLE(v128_shr_s16, 15U, 32U), SIMD_TUPLE(v128_shl_32, 31U, 32U), + SIMD_TUPLE(v128_shr_u32, 31U, 32U), SIMD_TUPLE(v128_shr_s32, 31U, 32U), + SIMD_TUPLE(v128_shl_64, 63U, 32U), SIMD_TUPLE(v128_shr_u64, 63U, 32U), + SIMD_TUPLE(v128_shr_s64, 63U, 32U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U), + SIMD_TUPLE(v128_movemask_8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128), SIMD_TUPLE(v128_hadd_u8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V128), SIMD_TUPLE(v128_low_v64, 0U, 0U), + SIMD_TUPLE(v128_high_v64, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U8), SIMD_TUPLE(v128_dup_8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U16), SIMD_TUPLE(v128_dup_16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64), SIMD_TUPLE(v128_dup_64, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128), SIMD_TUPLE(v128_dotp_s16, 0U, 0U), + SIMD_TUPLE(v128_dotp_s32, 0U, 0U), + SIMD_TUPLE(v128_dotp_su8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256V256), SIMD_TUPLE(v256_sad_u8, 0U, 0U), + SIMD_TUPLE(v256_ssd_u8, 0U, 0U), SIMD_TUPLE(v256_sad_u16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256), SIMD_TUPLE(v256_hadd_u8, 0U, 0U), + SIMD_TUPLE(v256_low_u64, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V256V256), SIMD_TUPLE(v256_dotp_s16, 0U, 0U), + SIMD_TUPLE(v256_dotp_s32, 0U, 0U), + SIMD_TUPLE(v256_dotp_su8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256V256), SIMD_TUPLE(v256_ssd_s16, 0U, 0U)); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V256_V256V256), SIMD_TUPLE(v256_add_8, 0U, 0U), + SIMD_TUPLE(v256_add_16, 0U, 0U), SIMD_TUPLE(v256_sadd_s16, 0U, 0U), + SIMD_TUPLE(v256_add_32, 0U, 0U), SIMD_TUPLE(v256_sub_8, 0U, 0U), + SIMD_TUPLE(v256_ssub_u8, 0U, 0U), SIMD_TUPLE(v256_ssub_s8, 0U, 0U), + SIMD_TUPLE(v256_sub_16, 0U, 0U), SIMD_TUPLE(v256_ssub_s16, 0U, 0U), + SIMD_TUPLE(v256_ssub_u16, 0U, 0U), SIMD_TUPLE(v256_sub_32, 0U, 0U), + SIMD_TUPLE(v256_ziplo_8, 0U, 0U), SIMD_TUPLE(v256_ziphi_8, 0U, 0U), + SIMD_TUPLE(v256_ziplo_16, 0U, 0U), SIMD_TUPLE(v256_ziphi_16, 0U, 0U), + SIMD_TUPLE(v256_ziplo_32, 0U, 0U), SIMD_TUPLE(v256_ziphi_32, 0U, 0U), + SIMD_TUPLE(v256_ziplo_64, 0U, 0U), SIMD_TUPLE(v256_ziphi_64, 0U, 0U), + SIMD_TUPLE(v256_ziplo_128, 0U, 0U), SIMD_TUPLE(v256_ziphi_128, 0U, 0U), + SIMD_TUPLE(v256_unziphi_8, 0U, 0U), SIMD_TUPLE(v256_unziplo_8, 0U, 0U), + SIMD_TUPLE(v256_unziphi_16, 0U, 0U), SIMD_TUPLE(v256_unziplo_16, 0U, 0U), + SIMD_TUPLE(v256_unziphi_32, 0U, 0U), SIMD_TUPLE(v256_unziplo_32, 0U, 0U), + SIMD_TUPLE(v256_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v256_pack_s16_u8, 0U, 0U), + SIMD_TUPLE(v256_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v256_or, 0U, 0U), + SIMD_TUPLE(v256_xor, 0U, 0U), SIMD_TUPLE(v256_and, 0U, 0U), + SIMD_TUPLE(v256_andn, 0U, 0U), SIMD_TUPLE(v256_mullo_s16, 0U, 0U), + SIMD_TUPLE(v256_mulhi_s16, 0U, 0U), SIMD_TUPLE(v256_mullo_s32, 0U, 0U), + SIMD_TUPLE(v256_madd_s16, 0U, 0U), SIMD_TUPLE(v256_madd_us8, 0U, 0U), + SIMD_TUPLE(v256_avg_u8, 0U, 0U), SIMD_TUPLE(v256_rdavg_u8, 0U, 0U), + SIMD_TUPLE(v256_avg_u16, 0U, 0U), SIMD_TUPLE(v256_min_u8, 0U, 0U), + SIMD_TUPLE(v256_max_u8, 0U, 0U), SIMD_TUPLE(v256_min_s8, 0U, 0U), + SIMD_TUPLE(v256_max_s8, 0U, 0U), SIMD_TUPLE(v256_min_s16, 0U, 0U), + SIMD_TUPLE(v256_max_s16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s8, 0U, 0U), + SIMD_TUPLE(v256_cmplt_s8, 0U, 0U)); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V256_V256V256_Part2), SIMD_TUPLE(v256_cmpeq_8, 0U, 0U), + SIMD_TUPLE(v256_min_s32, 0U, 0U), SIMD_TUPLE(v256_max_s32, 0U, 0U), + SIMD_TUPLE(v256_add_64, 0U, 0U), SIMD_TUPLE(v256_sub_64, 0U, 0U), + SIMD_TUPLE(v256_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v256_cmplt_s16, 0U, 0U), + SIMD_TUPLE(v256_cmpeq_16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s32, 0U, 0U), + SIMD_TUPLE(v256_cmplt_s32, 0U, 0U), SIMD_TUPLE(v256_cmpeq_32, 0U, 0U), + SIMD_TUPLE(v256_shuffle_8, 31U, 8U), SIMD_TUPLE(v256_pshuffle_8, 15U, 8U), + SIMD_TUPLE(imm_v256_align<1>, 0U, 0U), SIMD_TUPLE(v256_sadd_s8, 0U, 0U), + SIMD_TUPLE(v256_sadd_u8, 0U, 0U), SIMD_TUPLE(v256_pack_s32_u16, 0U, 0U), + SIMD_TUPLE(v256_rdavg_u16, 0U, 0U), SIMD_TUPLE(imm_v256_align<2>, 0U, 0U), + SIMD_TUPLE(v256_unziphi_64, 0U, 0U), SIMD_TUPLE(v256_unziplo_64, 0U, 0U), + SIMD_TUPLE(imm_v256_align<3>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<5>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<7>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<9>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<10>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<11>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<13>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<14>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<15>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<16>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<17>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<18>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<19>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<20>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<21>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<22>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<23>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<24>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<25>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<26>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<27>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<28>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<29>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<30>, 0U, 0U), + SIMD_TUPLE(imm_v256_align<31>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128V128), + SIMD_TUPLE(v256_from_v128, 0U, 0U), SIMD_TUPLE(v256_zip_8, 0U, 0U), + SIMD_TUPLE(v256_zip_16, 0U, 0U), SIMD_TUPLE(v256_zip_32, 0U, 0U), + SIMD_TUPLE(v256_mul_s16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128), + SIMD_TUPLE(v256_unpack_u8_s16, 0U, 0U), + SIMD_TUPLE(v256_unpack_s8_s16, 0U, 0U), + SIMD_TUPLE(v256_unpack_u16_s32, 0U, 0U), + SIMD_TUPLE(v256_unpack_s16_s32, 0U, 0U)); + +INSTANTIATE( + ARCH, ARCH_POSTFIX(V256_V256U32), SIMD_TUPLE(v256_shl_8, 7U, 32U), + SIMD_TUPLE(v256_shr_u8, 7U, 32U), SIMD_TUPLE(v256_shr_s8, 7U, 32U), + SIMD_TUPLE(v256_shl_16, 15U, 32U), SIMD_TUPLE(v256_shr_u16, 15U, 32U), + SIMD_TUPLE(v256_shr_s16, 15U, 32U), SIMD_TUPLE(v256_shl_32, 31U, 32U), + SIMD_TUPLE(v256_shr_u32, 31U, 32U), SIMD_TUPLE(v256_shr_s32, 31U, 32U), + SIMD_TUPLE(v256_shl_64, 63U, 32U), SIMD_TUPLE(v256_shr_u64, 63U, 32U), + SIMD_TUPLE(v256_shr_s64, 63U, 32U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256), SIMD_TUPLE(v256_abs_s8, 0U, 0U), + SIMD_TUPLE(v256_abs_s16, 0U, 0U), SIMD_TUPLE(v256_padd_s16, 0U, 0U), + SIMD_TUPLE(v256_unpacklo_u8_s16, 0U, 0U), + SIMD_TUPLE(v256_unpacklo_s8_s16, 0U, 0U), + SIMD_TUPLE(v256_unpacklo_u16_s32, 0U, 0U), + SIMD_TUPLE(v256_unpacklo_s16_s32, 0U, 0U), + SIMD_TUPLE(v256_unpackhi_u8_s16, 0U, 0U), + SIMD_TUPLE(v256_unpackhi_s8_s16, 0U, 0U), + SIMD_TUPLE(v256_unpackhi_u16_s32, 0U, 0U), + SIMD_TUPLE(v256_unpackhi_s16_s32, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<9>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<10>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<11>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<13>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<14>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<15>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<16>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<17>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<18>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<19>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<20>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<21>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<22>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<23>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<24>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<25>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<26>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<27>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<28>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<29>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<30>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_byte<31>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<3>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<5>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<7>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<8>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part2), + SIMD_TUPLE(imm_v256_shl_n_byte<9>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<10>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<11>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<13>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<14>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<15>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<16>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<17>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<18>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<19>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<20>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<21>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<22>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<23>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<24>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<25>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<26>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<27>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<28>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<29>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<30>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_byte<31>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_8<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_8<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_8<3>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_8<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_8<5>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_8<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_8<7>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u8<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u8<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u8<3>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u8<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u8<5>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u8<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u8<7>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s8<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s8<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s8<3>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s8<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s8<5>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s8<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s8<7>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_16<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_16<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_16<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_16<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_16<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_16<10>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part3), + SIMD_TUPLE(imm_v256_shl_n_16<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_16<14>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u16<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u16<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u16<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u16<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u16<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u16<10>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u16<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u16<14>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s16<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s16<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s16<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s16<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s16<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s16<10>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s16<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s16<14>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_32<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_32<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_32<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_32<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_32<16>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_32<20>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_32<24>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_32<28>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u32<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u32<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u32<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u32<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u32<16>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u32<20>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u32<24>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u32<28>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s32<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s32<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s32<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s32<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s32<16>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s32<20>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s32<24>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s32<28>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part4), + SIMD_TUPLE(imm_v256_shl_n_64<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<16>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<20>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<24>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<28>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<32>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<36>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<40>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<44>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<48>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<52>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<56>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_64<60>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<16>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<20>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<24>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<28>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<32>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<36>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<40>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<44>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<48>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<52>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<56>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_u64<60>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<16>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<20>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<24>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<28>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<32>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<36>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<40>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<44>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<48>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<52>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<56>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_s64<60>, 0U, 0U), + SIMD_TUPLE(v256_padd_u8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part5), + SIMD_TUPLE(imm_v256_shr_n_word<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<3>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<5>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<7>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<9>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<10>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<11>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<13>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<14>, 0U, 0U), + SIMD_TUPLE(imm_v256_shr_n_word<15>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<1>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<2>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<3>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<4>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<5>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<6>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<7>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<8>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<9>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<10>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<11>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<12>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<13>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<14>, 0U, 0U), + SIMD_TUPLE(imm_v256_shl_n_word<15>, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256V256V256), + SIMD_TUPLE(v256_blend_8, 0U, 0U), + SIMD_TUPLE(v256_wideshuffle_8, 63U, 8U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U8), SIMD_TUPLE(v256_dup_8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U16), SIMD_TUPLE(v256_dup_16, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U32), SIMD_TUPLE(v256_dup_32, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U64), SIMD_TUPLE(v256_dup_64, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256), SIMD_TUPLE(v256_low_u32, 0U, 0U), + SIMD_TUPLE(v256_movemask_8, 0U, 0U)); + +INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V256), SIMD_TUPLE(v256_low_v64, 0U, 0U)); + +} // namespace SIMD_NAMESPACE diff --git a/third_party/aom/test/simd_sse2_test.cc b/third_party/aom/test/simd_sse2_test.cc new file mode 100644 index 0000000000..b37a931b38 --- /dev/null +++ b/third_party/aom/test/simd_sse2_test.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSE2 +#define ARCH_POSTFIX(name) name##_sse2 +#define SIMD_NAMESPACE simd_test_sse2 +#include "test/simd_impl.h" +#endif diff --git a/third_party/aom/test/simd_sse4_test.cc b/third_party/aom/test/simd_sse4_test.cc new file mode 100644 index 0000000000..b1c9d5cd88 --- /dev/null +++ b/third_party/aom/test/simd_sse4_test.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSE4_1 +#define ARCH_POSTFIX(name) name##_sse4_1 +#define SIMD_NAMESPACE simd_test_sse4_1 +#include "test/simd_impl.h" +#endif diff --git a/third_party/aom/test/simd_ssse3_test.cc b/third_party/aom/test/simd_ssse3_test.cc new file mode 100644 index 0000000000..d95c26fb5e --- /dev/null +++ b/third_party/aom/test/simd_ssse3_test.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \ + (!defined(__GNUC__) && !defined(_DEBUG)) +#define ARCH SSSE3 +#define ARCH_POSTFIX(name) name##_ssse3 +#define SIMD_NAMESPACE simd_test_ssse3 +#include "test/simd_impl.h" +#endif diff --git a/third_party/aom/test/simple_decoder.sh b/third_party/aom/test/simple_decoder.sh new file mode 100755 index 0000000000..9b1aea1ed5 --- /dev/null +++ b/third_party/aom/test/simple_decoder.sh @@ -0,0 +1,58 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom simple_decoder example code. To add new tests to +## this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to simple_decoder_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available: +simple_decoder_verify_environment() { + if [ ! "$(av1_encode_available)" = "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then + return 1 + fi +} + +# Runs simple_decoder using $1 as input file. $2 is the codec name, and is used +# solely to name the output file. +simple_decoder() { + local decoder="$(aom_tool_path simple_decoder)" + local input_file="$1" + local codec="$2" + local output_file="${AOM_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw" + + if [ ! -x "${decoder}" ]; then + elog "${decoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ + ${devnull} || return 1 + + [ -e "${output_file}" ] || return 1 +} + +simple_decoder_av1() { + if [ "$(av1_decode_available)" = "yes" ]; then + if [ ! -e "${AV1_IVF_FILE}" ]; then + local file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf" + encode_yuv_raw_input_av1 "${file}" --ivf + simple_decoder "${file}" av1 || return 1 + else + simple_decoder "${AV1_IVF_FILE}" av1 || return 1 + fi + fi +} + +simple_decoder_tests="simple_decoder_av1" + +run_tests simple_decoder_verify_environment "${simple_decoder_tests}" diff --git a/third_party/aom/test/simple_encoder.sh b/third_party/aom/test/simple_encoder.sh new file mode 100755 index 0000000000..dfb1a1b546 --- /dev/null +++ b/third_party/aom/test/simple_encoder.sh @@ -0,0 +1,53 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom simple_encoder example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to simple_encoder_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +simple_encoder_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi +} + +# Runs simple_encoder using the codec specified by $1 with a frame limit of 100. +simple_encoder() { + local encoder="${LIBAOM_BIN_PATH}/simple_encoder${AOM_TEST_EXE_SUFFIX}" + local codec="$1" + local output_file="${AOM_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf" + + if [ ! -x "${encoder}" ]; then + elog "${encoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 5 \ + ${devnull} || return 1 + + [ -e "${output_file}" ] || return 1 +} + + +simple_encoder_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + simple_encoder av1 || return 1 + fi +} + +simple_encoder_tests="simple_encoder_av1" + +run_tests simple_encoder_verify_environment "${simple_encoder_tests}" diff --git a/third_party/aom/test/sse_sum_test.cc b/third_party/aom/test/sse_sum_test.cc new file mode 100644 index 0000000000..fd6fb886d3 --- /dev/null +++ b/third_party/aom/test/sse_sum_test.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cmath> +#include <cstdlib> +#include <string> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom_ports/mem.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "test/function_equivalence_test.h" + +using libaom_test::ACMRandom; +using libaom_test::FunctionEquivalenceTest; +using ::testing::Combine; +using ::testing::Range; +using ::testing::Values; +using ::testing::ValuesIn; + +namespace { +const int kNumIterations = 10000; + +typedef uint64_t (*SSI16Func)(const int16_t *src, int src_stride, int width, + int height, int *sum); +typedef libaom_test::FuncParam<SSI16Func> TestFuncs; + +class SumSSETest : public ::testing::TestWithParam<TestFuncs> { + public: + ~SumSSETest() override = default; + void SetUp() override { + params_ = this->GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2)); + ASSERT_NE(src_, nullptr); + } + + void TearDown() override { aom_free(src_); } + void RunTest(int isRandom); + void RunSpeedTest(); + + void GenRandomData(int width, int height, int stride) { + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit); + } + } + } + + void GenExtremeData(int width, int height, int stride) { + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + const int val = rnd_(2) ? limit - 1 : -(limit - 1); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src_[ii * stride + jj] = val; + } + } + } + + protected: + TestFuncs params_; + int16_t *src_; + ACMRandom rnd_; +}; + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSSETest); + +void SumSSETest::RunTest(int isRandom) { + for (int k = 0; k < kNumIterations; k++) { + const int width = 4 * (rnd_(31) + 1); // Up to 128x128 + const int height = 4 * (rnd_(31) + 1); // Up to 128x128 + int stride = 4 << rnd_(7); // Up to 256 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(7); + } + if (isRandom) { + GenRandomData(width, height, stride); + } else { + GenExtremeData(width, height, stride); + } + int sum_ref = 0, sum_tst = 0; + const uint64_t sse_ref = + params_.ref_func(src_, stride, width, height, &sum_ref); + const uint64_t sse_tst = + params_.tst_func(src_, stride, width, height, &sum_tst); + + EXPECT_EQ(sse_ref, sse_tst) + << "Error: SumSSETest [" << width << "x" << height + << "] C SSE does not match optimized output."; + EXPECT_EQ(sum_ref, sum_tst) + << "Error: SumSSETest [" << width << "x" << height + << "] C Sum does not match optimized output."; + } +} + +void SumSSETest::RunSpeedTest() { + for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) { + const int width = block_size_wide[block]; // Up to 128x128 + const int height = block_size_high[block]; // Up to 128x128 + int stride = 4 << rnd_(7); // Up to 256 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(7); + } + GenExtremeData(width, height, stride); + const int num_loops = 1000000000 / (width + height); + int sum_ref = 0, sum_tst = 0; + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + + for (int i = 0; i < num_loops; ++i) + params_.ref_func(src_, stride, width, height, &sum_ref); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height, + 1000.0 * elapsed_time / num_loops); + + aom_usec_timer timer1; + aom_usec_timer_start(&timer1); + for (int i = 0; i < num_loops; ++i) + params_.tst_func(src_, stride, width, height, &sum_tst); + aom_usec_timer_mark(&timer1); + const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); + printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height, + 1000.0 * elapsed_time1 / num_loops); + } +} + +TEST_P(SumSSETest, OperationCheck) { + RunTest(1); // GenRandomData +} + +TEST_P(SumSSETest, ExtremeValues) { + RunTest(0); // GenExtremeData +} + +TEST_P(SumSSETest, DISABLED_Speed) { RunSpeedTest(); } + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, SumSSETest, + ::testing::Values(TestFuncs( + &aom_sum_sse_2d_i16_c, &aom_sum_sse_2d_i16_sse2))); + +#endif // HAVE_SSE2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, SumSSETest, + ::testing::Values(TestFuncs( + &aom_sum_sse_2d_i16_c, &aom_sum_sse_2d_i16_neon))); +#endif // HAVE_NEON + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, SumSSETest, + ::testing::Values(TestFuncs( + &aom_sum_sse_2d_i16_c, &aom_sum_sse_2d_i16_avx2))); +#endif // HAVE_AVX2 + +#if HAVE_SVE +INSTANTIATE_TEST_SUITE_P(SVE, SumSSETest, + ::testing::Values(TestFuncs(&aom_sum_sse_2d_i16_c, + &aom_sum_sse_2d_i16_sve))); +#endif // HAVE_SVE + +} // namespace diff --git a/third_party/aom/test/still_picture_test.cc b/third_party/aom/test/still_picture_test.cc new file mode 100644 index 0000000000..3dfb1c8693 --- /dev/null +++ b/third_party/aom/test/still_picture_test.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +// This class is used to test the presence of still picture feature. +class StillPicturePresenceTest + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, + public ::libaom_test::EncoderTest { + protected: + StillPicturePresenceTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + enable_full_header_(GET_PARAM(2)) { + still_picture_coding_violated_ = false; + } + ~StillPicturePresenceTest() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = AOM_Q; + cfg_.g_threads = 1; + cfg_.full_still_picture_hdr = enable_full_header_; + cfg_.g_limit = 1; + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AV1E_SET_FORCE_VIDEO_MODE, 0); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_STILL_PICTURE, + &still_pic_info_); + if (still_pic_info_.is_still_picture != 1) { + still_picture_coding_violated_ = true; + } + if (still_pic_info_.is_reduced_still_picture_hdr == enable_full_header_) { + /* If full_still_picture_header is enabled in encoder config but + * bitstream contains reduced_still_picture_header set, then set + * still_picture_coding_violated_ to true. + * Similarly, if full_still_picture_header is disabled in encoder config + * but bitstream contains reduced_still_picture_header not set, then set + * still_picture_coding_violated_ to true. + */ + still_picture_coding_violated_ = true; + } + } + return AOM_CODEC_OK == res_dec; + } + + ::libaom_test::TestMode encoding_mode_; + bool still_picture_coding_violated_; + int enable_full_header_; + aom_still_picture_info still_pic_info_; + aom_rc_mode end_usage_check_; +}; + +TEST_P(StillPicturePresenceTest, StillPictureEncodePresenceTest) { + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 1); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(still_picture_coding_violated_, false); +} + +AV1_INSTANTIATE_TEST_SUITE(StillPicturePresenceTest, + ::testing::Values(::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood), + ::testing::Values(1, 0)); +} // namespace diff --git a/third_party/aom/test/subtract_test.cc b/third_party/aom/test/subtract_test.cc new file mode 100644 index 0000000000..e591e6543d --- /dev/null +++ b/third_party/aom/test/subtract_test.cc @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdint> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "av1/common/blockd.h" +#include "aom_mem/aom_mem.h" +#include "aom_ports/mem.h" + +typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr, + ptrdiff_t diff_stride, const uint8_t *src_ptr, + ptrdiff_t src_stride, const uint8_t *pred_ptr, + ptrdiff_t pred_stride); + +namespace { + +using std::get; +using std::make_tuple; +using std::tuple; + +using libaom_test::ACMRandom; + +// <BLOCK_SIZE, optimized subtract func, reference subtract func> +using Params = tuple<BLOCK_SIZE, SubtractFunc, SubtractFunc>; + +class AV1SubtractBlockTestBase : public ::testing::Test { + public: + AV1SubtractBlockTestBase(BLOCK_SIZE bs, int bit_depth, SubtractFunc func, + SubtractFunc ref_func) { + block_width_ = block_size_wide[bs]; + block_height_ = block_size_high[bs]; + func_ = func; + ref_func_ = ref_func; + if (bit_depth == -1) { + hbd_ = false; + bit_depth_ = AOM_BITS_8; + } else { + hbd_ = true; + bit_depth_ = static_cast<aom_bit_depth_t>(bit_depth); + } + } + + void SetUp() override { + rnd_.Reset(ACMRandom::DeterministicSeed()); + + const size_t max_width = 128; + const size_t max_block_size = max_width * max_width; + if (hbd_) { + src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + aom_memalign(16, max_block_size * sizeof(uint16_t)))); + ASSERT_NE(src_, nullptr); + pred_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + aom_memalign(16, max_block_size * sizeof(uint16_t)))); + ASSERT_NE(pred_, nullptr); + } else { + src_ = reinterpret_cast<uint8_t *>( + aom_memalign(16, max_block_size * sizeof(uint8_t))); + ASSERT_NE(src_, nullptr); + pred_ = reinterpret_cast<uint8_t *>( + aom_memalign(16, max_block_size * sizeof(uint8_t))); + ASSERT_NE(pred_, nullptr); + } + diff_ = reinterpret_cast<int16_t *>( + aom_memalign(32, max_block_size * sizeof(int16_t))); + ASSERT_NE(diff_, nullptr); + } + + void TearDown() override { + if (hbd_) { + aom_free(CONVERT_TO_SHORTPTR(src_)); + aom_free(CONVERT_TO_SHORTPTR(pred_)); + } else { + aom_free(src_); + aom_free(pred_); + } + aom_free(diff_); + } + + protected: + void CheckResult(); + void RunForSpeed(); + + private: + void FillInputs(); + + ACMRandom rnd_; + int block_height_; + int block_width_; + bool hbd_; + aom_bit_depth_t bit_depth_; + SubtractFunc func_; + SubtractFunc ref_func_; + uint8_t *src_; + uint8_t *pred_; + int16_t *diff_; +}; + +void AV1SubtractBlockTestBase::FillInputs() { + const size_t max_width = 128; + const int max_block_size = max_width * max_width; + if (hbd_) { + const int mask = (1 << bit_depth_) - 1; + for (int i = 0; i < max_block_size; ++i) { + CONVERT_TO_SHORTPTR(src_)[i] = rnd_.Rand16() & mask; + CONVERT_TO_SHORTPTR(pred_)[i] = rnd_.Rand16() & mask; + } + } else { + if (src_ == nullptr) { + std::cerr << "gadfg" << std::endl; + } + for (int i = 0; i < max_block_size; ++i) { + src_[i] = rnd_.Rand8(); + pred_[i] = rnd_.Rand8(); + } + } +} + +void AV1SubtractBlockTestBase::CheckResult() { + const int test_num = 100; + int i; + + for (i = 0; i < test_num; ++i) { + FillInputs(); + + func_(block_height_, block_width_, diff_, block_width_, src_, block_width_, + pred_, block_width_); + + if (hbd_) + for (int r = 0; r < block_height_; ++r) { + for (int c = 0; c < block_width_; ++c) { + EXPECT_EQ(diff_[r * block_width_ + c], + (CONVERT_TO_SHORTPTR(src_)[r * block_width_ + c] - + CONVERT_TO_SHORTPTR(pred_)[r * block_width_ + c])) + << "r = " << r << ", c = " << c << ", test: " << i; + } + } + else { + for (int r = 0; r < block_height_; ++r) { + for (int c = 0; c < block_width_; ++c) { + EXPECT_EQ(diff_[r * block_width_ + c], + src_[r * block_width_ + c] - pred_[r * block_width_ + c]) + << "r = " << r << ", c = " << c << ", test: " << i; + } + } + } + } +} + +void AV1SubtractBlockTestBase::RunForSpeed() { + const int test_num = 200000; + int i; + + if (ref_func_ == func_) GTEST_SKIP(); + + FillInputs(); + + aom_usec_timer ref_timer; + aom_usec_timer_start(&ref_timer); + for (i = 0; i < test_num; ++i) { + ref_func_(block_height_, block_width_, diff_, block_width_, src_, + block_width_, pred_, block_width_); + } + aom_usec_timer_mark(&ref_timer); + const int64_t ref_elapsed_time = aom_usec_timer_elapsed(&ref_timer); + + FillInputs(); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (i = 0; i < test_num; ++i) { + func_(block_height_, block_width_, diff_, block_width_, src_, block_width_, + pred_, block_width_); + } + aom_usec_timer_mark(&timer); + const int64_t elapsed_time = aom_usec_timer_elapsed(&timer); + + printf( + "[%dx%d]: " + "ref_time=%6" PRId64 " \t simd_time=%6" PRId64 + " \t " + "gain=%f \n", + block_width_, block_height_, ref_elapsed_time, elapsed_time, + static_cast<double>(ref_elapsed_time) / + static_cast<double>(elapsed_time)); +} + +class AV1SubtractBlockTest : public ::testing::WithParamInterface<Params>, + public AV1SubtractBlockTestBase { + public: + AV1SubtractBlockTest() + : AV1SubtractBlockTestBase(GET_PARAM(0), -1, GET_PARAM(1), GET_PARAM(2)) { + } +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1SubtractBlockTest); + +TEST_P(AV1SubtractBlockTest, CheckResult) { CheckResult(); } +TEST_P(AV1SubtractBlockTest, DISABLED_Speed) { RunForSpeed(); } + +const BLOCK_SIZE kValidBlockSize[] = { BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, + BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, + BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, + BLOCK_64X64, BLOCK_64X128, BLOCK_128X64, + BLOCK_128X128 }; + +INSTANTIATE_TEST_SUITE_P( + C, AV1SubtractBlockTest, + ::testing::Combine(::testing::ValuesIn(kValidBlockSize), + ::testing::Values(&aom_subtract_block_c), + ::testing::Values(&aom_subtract_block_c))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AV1SubtractBlockTest, + ::testing::Combine(::testing::ValuesIn(kValidBlockSize), + ::testing::Values(&aom_subtract_block_sse2), + ::testing::Values(&aom_subtract_block_c))); +#endif +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1SubtractBlockTest, + ::testing::Combine(::testing::ValuesIn(kValidBlockSize), + ::testing::Values(&aom_subtract_block_avx2), + ::testing::Values(&aom_subtract_block_c))); + +#endif +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1SubtractBlockTest, + ::testing::Combine(::testing::ValuesIn(kValidBlockSize), + ::testing::Values(&aom_subtract_block_neon), + ::testing::Values(&aom_subtract_block_c))); + +#endif + +#if CONFIG_AV1_HIGHBITDEPTH + +// <BLOCK_SIZE, bit_depth, optimized subtract func, reference subtract func> +using ParamsHBD = tuple<BLOCK_SIZE, int, SubtractFunc, SubtractFunc>; + +class AV1HBDSubtractBlockTest : public ::testing::WithParamInterface<ParamsHBD>, + public AV1SubtractBlockTestBase { + public: + AV1HBDSubtractBlockTest() + : AV1SubtractBlockTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2), + GET_PARAM(3)) {} +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HBDSubtractBlockTest); + +INSTANTIATE_TEST_SUITE_P( + C, AV1HBDSubtractBlockTest, + ::testing::Combine(::testing::ValuesIn(kValidBlockSize), + ::testing::Values(12), + ::testing::Values(&aom_highbd_subtract_block_c), + ::testing::Values(&aom_highbd_subtract_block_c))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, AV1HBDSubtractBlockTest, + ::testing::Combine(::testing::ValuesIn(kValidBlockSize), + ::testing::Values(12), + ::testing::Values(&aom_highbd_subtract_block_sse2), + ::testing::Values(&aom_highbd_subtract_block_c))); +#endif // HAVE_SSE2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1HBDSubtractBlockTest, + ::testing::Combine(::testing::ValuesIn(kValidBlockSize), + ::testing::Values(12), + ::testing::Values(&aom_highbd_subtract_block_neon), + ::testing::Values(&aom_highbd_subtract_block_c))); +#endif +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace diff --git a/third_party/aom/test/sum_squares_test.cc b/third_party/aom/test/sum_squares_test.cc new file mode 100644 index 0000000000..7b98ced523 --- /dev/null +++ b/third_party/aom/test/sum_squares_test.cc @@ -0,0 +1,928 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cmath> +#include <cstdlib> +#include <string> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom_ports/mem.h" +#include "av1/common/common_data.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "test/function_equivalence_test.h" + +using libaom_test::ACMRandom; +using libaom_test::FunctionEquivalenceTest; +using ::testing::Combine; +using ::testing::Range; +using ::testing::Values; +using ::testing::ValuesIn; + +namespace { +const int kNumIterations = 10000; + +static const int16_t kInt13Max = (1 << 12) - 1; + +typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int width, + int height); +typedef libaom_test::FuncParam<SSI16Func> TestFuncs; + +class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> { + public: + ~SumSquaresTest() override = default; + void SetUp() override { + params_ = this->GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2)); + ASSERT_NE(src_, nullptr); + } + + void TearDown() override { aom_free(src_); } + void RunTest(bool is_random); + void RunSpeedTest(); + + void GenRandomData(int width, int height, int stride) { + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit); + } + } + } + + void GenExtremeData(int width, int height, int stride) { + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + const int val = rnd_(2) ? limit - 1 : -(limit - 1); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src_[ii * stride + jj] = val; + } + } + } + + protected: + TestFuncs params_; + int16_t *src_; + ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquaresTest); + +void SumSquaresTest::RunTest(bool is_random) { + int failed = 0; + for (int k = 0; k < kNumIterations; k++) { + const int width = 4 * (rnd_(31) + 1); // Up to 128x128 + const int height = 4 * (rnd_(31) + 1); // Up to 128x128 + int stride = 4 << rnd_(7); // Up to 256 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(7); + } + if (is_random) { + GenRandomData(width, height, stride); + } else { + GenExtremeData(width, height, stride); + } + const uint64_t res_ref = params_.ref_func(src_, stride, width, height); + uint64_t res_tst; + API_REGISTER_STATE_CHECK(res_tst = + params_.tst_func(src_, stride, width, height)); + + if (!failed) { + failed = res_ref != res_tst; + EXPECT_EQ(res_ref, res_tst) + << "Error: Sum Squares Test [" << width << "x" << height + << "] C output does not match optimized output."; + } + } +} + +void SumSquaresTest::RunSpeedTest() { + for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) { + const int width = block_size_wide[block]; // Up to 128x128 + const int height = block_size_high[block]; // Up to 128x128 + int stride = 4 << rnd_(7); // Up to 256 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(7); + } + GenExtremeData(width, height, stride); + const int num_loops = 1000000000 / (width + height); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + + for (int i = 0; i < num_loops; ++i) + params_.ref_func(src_, stride, width, height); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height, + 1000.0 * elapsed_time / num_loops); + + aom_usec_timer timer1; + aom_usec_timer_start(&timer1); + for (int i = 0; i < num_loops; ++i) + params_.tst_func(src_, stride, width, height); + aom_usec_timer_mark(&timer1); + const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); + printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height, + 1000.0 * elapsed_time1 / num_loops); + } +} + +TEST_P(SumSquaresTest, OperationCheck) { + RunTest(true); // GenRandomData +} + +TEST_P(SumSquaresTest, ExtremeValues) { + RunTest(false); // GenExtremeData +} + +TEST_P(SumSquaresTest, DISABLED_Speed) { RunSpeedTest(); } + +#if HAVE_SSE2 + +INSTANTIATE_TEST_SUITE_P( + SSE2, SumSquaresTest, + ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, + &aom_sum_squares_2d_i16_sse2))); + +#endif // HAVE_SSE2 + +#if HAVE_NEON + +INSTANTIATE_TEST_SUITE_P( + NEON, SumSquaresTest, + ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, + &aom_sum_squares_2d_i16_neon))); + +#endif // HAVE_NEON + +#if HAVE_SVE +INSTANTIATE_TEST_SUITE_P( + SVE, SumSquaresTest, + ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, + &aom_sum_squares_2d_i16_sve))); + +#endif // HAVE_SVE + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, SumSquaresTest, + ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, + &aom_sum_squares_2d_i16_avx2))); +#endif // HAVE_AVX2 + +////////////////////////////////////////////////////////////////////////////// +// 1D version +////////////////////////////////////////////////////////////////////////////// + +typedef uint64_t (*F1D)(const int16_t *src, uint32_t n); +typedef libaom_test::FuncParam<F1D> TestFuncs1D; + +class SumSquares1DTest : public FunctionEquivalenceTest<F1D> { + protected: + static const int kIterations = 1000; + static const int kMaxSize = 256; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquares1DTest); + +TEST_P(SumSquares1DTest, RandomValues) { + DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + for (int i = 0; i < kMaxSize * kMaxSize; ++i) + src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max; + + // Block size is between 64 and 128 * 128 and is always a multiple of 64. + const int n = (rng_(255) + 1) * 64; + + const uint64_t ref_res = params_.ref_func(src, n); + uint64_t tst_res; + API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +TEST_P(SumSquares1DTest, ExtremeValues) { + DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); + + for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { + if (rng_(2)) { + for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max; + } else { + for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max; + } + + // Block size is between 64 and 128 * 128 and is always a multiple of 64. + const int n = (rng_(255) + 1) * 64; + + const uint64_t ref_res = params_.ref_func(src, n); + uint64_t tst_res; + API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n)); + + ASSERT_EQ(ref_res, tst_res); + } +} + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P(SSE2, SumSquares1DTest, + ::testing::Values(TestFuncs1D( + aom_sum_squares_i16_c, aom_sum_squares_i16_sse2))); + +#endif // HAVE_SSE2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, SumSquares1DTest, + ::testing::Values(TestFuncs1D( + aom_sum_squares_i16_c, aom_sum_squares_i16_neon))); + +#endif // HAVE_NEON + +#if HAVE_SVE +INSTANTIATE_TEST_SUITE_P(SVE, SumSquares1DTest, + ::testing::Values(TestFuncs1D( + aom_sum_squares_i16_c, aom_sum_squares_i16_sve))); + +#endif // HAVE_SVE + +typedef int64_t (*SSEFunc)(const uint8_t *a, int a_stride, const uint8_t *b, + int b_stride, int width, int height); +typedef libaom_test::FuncParam<SSEFunc> TestSSEFuncs; + +typedef std::tuple<TestSSEFuncs, int> SSETestParam; + +class SSETest : public ::testing::TestWithParam<SSETestParam> { + public: + ~SSETest() override = default; + void SetUp() override { + params_ = GET_PARAM(0); + width_ = GET_PARAM(1); + is_hbd_ = +#if CONFIG_AV1_HIGHBITDEPTH + params_.ref_func == aom_highbd_sse_c; +#else + false; +#endif + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2)); + ref_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2)); + ASSERT_NE(src_, nullptr); + ASSERT_NE(ref_, nullptr); + } + + void TearDown() override { + aom_free(src_); + aom_free(ref_); + } + void RunTest(bool is_random, int width, int height, int run_times); + + void GenRandomData(int width, int height, int stride) { + uint16_t *src16 = reinterpret_cast<uint16_t *>(src_); + uint16_t *ref16 = reinterpret_cast<uint16_t *>(ref_); + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + if (!is_hbd_) { + src_[ii * stride + jj] = rnd_.Rand8(); + ref_[ii * stride + jj] = rnd_.Rand8(); + } else { + src16[ii * stride + jj] = rnd_(limit); + ref16[ii * stride + jj] = rnd_(limit); + } + } + } + } + + void GenExtremeData(int width, int height, int stride, uint8_t *data, + int16_t val) { + uint16_t *data16 = reinterpret_cast<uint16_t *>(data); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + if (!is_hbd_) { + data[ii * stride + jj] = static_cast<uint8_t>(val); + } else { + data16[ii * stride + jj] = val; + } + } + } + } + + protected: + bool is_hbd_; + int width_; + TestSSEFuncs params_; + uint8_t *src_; + uint8_t *ref_; + ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSETest); + +void SSETest::RunTest(bool is_random, int width, int height, int run_times) { + int failed = 0; + aom_usec_timer ref_timer, test_timer; + for (int k = 0; k < 3; k++) { + int stride = 4 << rnd_(7); // Up to 256 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(7); + } + if (is_random) { + GenRandomData(width, height, stride); + } else { + const int msb = is_hbd_ ? 12 : 8; // Up to 12 bit input + const int limit = (1 << msb) - 1; + if (k == 0) { + GenExtremeData(width, height, stride, src_, 0); + GenExtremeData(width, height, stride, ref_, limit); + } else { + GenExtremeData(width, height, stride, src_, limit); + GenExtremeData(width, height, stride, ref_, 0); + } + } + int64_t res_ref, res_tst; + uint8_t *src = src_; + uint8_t *ref = ref_; + if (is_hbd_) { + src = CONVERT_TO_BYTEPTR(src_); + ref = CONVERT_TO_BYTEPTR(ref_); + } + res_ref = params_.ref_func(src, stride, ref, stride, width, height); + res_tst = params_.tst_func(src, stride, ref, stride, width, height); + if (run_times > 1) { + aom_usec_timer_start(&ref_timer); + for (int j = 0; j < run_times; j++) { + params_.ref_func(src, stride, ref, stride, width, height); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int j = 0; j < run_times; j++) { + params_.tst_func(src, stride, ref, stride, width, height); + } + aom_usec_timer_mark(&test_timer); + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + + printf( + "c_time=%d \t simd_time=%d \t " + "gain=%d\n", + elapsed_time_c, elapsed_time_simd, + (elapsed_time_c / elapsed_time_simd)); + } else { + if (!failed) { + failed = res_ref != res_tst; + EXPECT_EQ(res_ref, res_tst) + << "Error:" << (is_hbd_ ? "hbd " : " ") << k << " SSE Test [" + << width << "x" << height + << "] C output does not match optimized output."; + } + } + } +} + +TEST_P(SSETest, OperationCheck) { + for (int height = 4; height <= 128; height += 4) { + RunTest(true, width_, height, 1); // GenRandomData + } +} + +TEST_P(SSETest, ExtremeValues) { + for (int height = 4; height <= 128; height += 4) { + RunTest(false, width_, height, 1); + } +} + +TEST_P(SSETest, DISABLED_Speed) { + for (int height = 4; height <= 128; height += 4) { + RunTest(true, width_, height, 100); + } +} + +#if HAVE_NEON +TestSSEFuncs sse_neon[] = { + TestSSEFuncs(&aom_sse_c, &aom_sse_neon), +#if CONFIG_AV1_HIGHBITDEPTH + TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_neon) +#endif +}; +INSTANTIATE_TEST_SUITE_P(NEON, SSETest, + Combine(ValuesIn(sse_neon), Range(4, 129, 4))); +#endif // HAVE_NEON + +#if HAVE_NEON_DOTPROD +TestSSEFuncs sse_neon_dotprod[] = { + TestSSEFuncs(&aom_sse_c, &aom_sse_neon_dotprod), +}; +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SSETest, + Combine(ValuesIn(sse_neon_dotprod), Range(4, 129, 4))); +#endif // HAVE_NEON_DOTPROD + +#if HAVE_SSE4_1 +TestSSEFuncs sse_sse4[] = { + TestSSEFuncs(&aom_sse_c, &aom_sse_sse4_1), +#if CONFIG_AV1_HIGHBITDEPTH + TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_sse4_1) +#endif +}; +INSTANTIATE_TEST_SUITE_P(SSE4_1, SSETest, + Combine(ValuesIn(sse_sse4), Range(4, 129, 4))); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 + +TestSSEFuncs sse_avx2[] = { + TestSSEFuncs(&aom_sse_c, &aom_sse_avx2), +#if CONFIG_AV1_HIGHBITDEPTH + TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_avx2) +#endif +}; +INSTANTIATE_TEST_SUITE_P(AVX2, SSETest, + Combine(ValuesIn(sse_avx2), Range(4, 129, 4))); +#endif // HAVE_AVX2 + +#if HAVE_SVE +#if CONFIG_AV1_HIGHBITDEPTH +TestSSEFuncs sse_sve[] = { TestSSEFuncs(&aom_highbd_sse_c, + &aom_highbd_sse_sve) }; +INSTANTIATE_TEST_SUITE_P(SVE, SSETest, + Combine(ValuesIn(sse_sve), Range(4, 129, 4))); +#endif +#endif // HAVE_SVE + +////////////////////////////////////////////////////////////////////////////// +// get_blk sum squares test functions +////////////////////////////////////////////////////////////////////////////// + +typedef void (*sse_sum_func)(const int16_t *data, int stride, int bw, int bh, + int *x_sum, int64_t *x2_sum); +typedef libaom_test::FuncParam<sse_sum_func> TestSSE_SumFuncs; + +typedef std::tuple<TestSSE_SumFuncs, TX_SIZE> SSE_SumTestParam; + +class SSE_Sum_Test : public ::testing::TestWithParam<SSE_SumTestParam> { + public: + ~SSE_Sum_Test() override = default; + void SetUp() override { + params_ = GET_PARAM(0); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<int16_t *>(aom_memalign(32, 256 * 256 * 2)); + ASSERT_NE(src_, nullptr); + } + + void TearDown() override { aom_free(src_); } + void RunTest(bool is_random, int tx_size, int run_times); + + void GenRandomData(int width, int height, int stride) { + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src_[ii * stride + jj] = rnd_(limit); + } + } + } + + void GenExtremeData(int width, int height, int stride, int16_t *data, + int16_t val) { + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + data[ii * stride + jj] = val; + } + } + } + + protected: + TestSSE_SumFuncs params_; + int16_t *src_; + ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSE_Sum_Test); + +void SSE_Sum_Test::RunTest(bool is_random, int tx_size, int run_times) { + aom_usec_timer ref_timer, test_timer; + int width = tx_size_wide[tx_size]; + int height = tx_size_high[tx_size]; + for (int k = 0; k < 3; k++) { + int stride = 4 << rnd_(7); // Up to 256 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(7); + } + if (is_random) { + GenRandomData(width, height, stride); + } else { + const int msb = 12; // Up to 12 bit input + const int limit = (1 << msb) - 1; + if (k == 0) { + GenExtremeData(width, height, stride, src_, limit); + } else { + GenExtremeData(width, height, stride, src_, -limit); + } + } + int sum_c = 0; + int64_t sse_intr = 0; + int sum_intr = 0; + int64_t sse_c = 0; + + params_.ref_func(src_, stride, width, height, &sum_c, &sse_c); + params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr); + + if (run_times > 1) { + aom_usec_timer_start(&ref_timer); + for (int j = 0; j < run_times; j++) { + params_.ref_func(src_, stride, width, height, &sum_c, &sse_c); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int j = 0; j < run_times; j++) { + params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr); + } + aom_usec_timer_mark(&test_timer); + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + + printf( + "c_time=%d \t simd_time=%d \t " + "gain=%f\t width=%d\t height=%d \n", + elapsed_time_c, elapsed_time_simd, + (float)((float)elapsed_time_c / (float)elapsed_time_simd), width, + height); + + } else { + EXPECT_EQ(sum_c, sum_intr) + << "Error:" << k << " SSE Sum Test [" << width << "x" << height + << "] C output does not match optimized output."; + EXPECT_EQ(sse_c, sse_intr) + << "Error:" << k << " SSE Sum Test [" << width << "x" << height + << "] C output does not match optimized output."; + } + } +} + +TEST_P(SSE_Sum_Test, OperationCheck) { + RunTest(true, GET_PARAM(1), 1); // GenRandomData +} + +TEST_P(SSE_Sum_Test, ExtremeValues) { RunTest(false, GET_PARAM(1), 1); } + +TEST_P(SSE_Sum_Test, DISABLED_Speed) { RunTest(true, GET_PARAM(1), 10000); } + +#if HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON +const TX_SIZE kValidBlockSize[] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32, + TX_64X64, TX_4X8, TX_8X4, TX_8X16, + TX_16X8, TX_16X32, TX_32X16, TX_64X32, + TX_32X64, TX_4X16, TX_16X4, TX_8X32, + TX_32X8, TX_16X64, TX_64X16 }; +#endif + +#if HAVE_SSE2 +TestSSE_SumFuncs sse_sum_sse2[] = { TestSSE_SumFuncs( + &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_sse2) }; +INSTANTIATE_TEST_SUITE_P(SSE2, SSE_Sum_Test, + Combine(ValuesIn(sse_sum_sse2), + ValuesIn(kValidBlockSize))); +#endif // HAVE_SSE2 + +#if HAVE_AVX2 +TestSSE_SumFuncs sse_sum_avx2[] = { TestSSE_SumFuncs( + &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_avx2) }; +INSTANTIATE_TEST_SUITE_P(AVX2, SSE_Sum_Test, + Combine(ValuesIn(sse_sum_avx2), + ValuesIn(kValidBlockSize))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +TestSSE_SumFuncs sse_sum_neon[] = { TestSSE_SumFuncs( + &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_neon) }; +INSTANTIATE_TEST_SUITE_P(NEON, SSE_Sum_Test, + Combine(ValuesIn(sse_sum_neon), + ValuesIn(kValidBlockSize))); +#endif // HAVE_NEON + +#if HAVE_SVE +TestSSE_SumFuncs sse_sum_sve[] = { TestSSE_SumFuncs(&aom_get_blk_sse_sum_c, + &aom_get_blk_sse_sum_sve) }; +INSTANTIATE_TEST_SUITE_P(SVE, SSE_Sum_Test, + Combine(ValuesIn(sse_sum_sve), + ValuesIn(kValidBlockSize))); +#endif // HAVE_SVE + +////////////////////////////////////////////////////////////////////////////// +// 2D Variance test functions +////////////////////////////////////////////////////////////////////////////// + +typedef uint64_t (*Var2DFunc)(uint8_t *src, int stride, int width, int height); +typedef libaom_test::FuncParam<Var2DFunc> TestFuncVar2D; + +const uint16_t test_block_size[2] = { 128, 256 }; + +class Lowbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> { + public: + ~Lowbd2dVarTest() override = default; + void SetUp() override { + params_ = this->GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<uint8_t *>( + aom_memalign(16, 512 * 512 * sizeof(uint8_t))); + ASSERT_NE(src_, nullptr); + } + + void TearDown() override { aom_free(src_); } + void RunTest(bool is_random); + void RunSpeedTest(); + + void GenRandomData(int width, int height, int stride) { + const int msb = 7; // Up to 8 bit input + const int limit = 1 << (msb + 1); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src_[ii * stride + jj] = rnd_(limit); + } + } + } + + void GenExtremeData(int width, int height, int stride) { + const int msb = 7; // Up to 8 bit input + const int limit = 1 << (msb + 1); + const int val = rnd_(2) ? limit - 1 : 0; + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src_[ii * stride + jj] = val; + } + } + } + + protected: + TestFuncVar2D params_; + uint8_t *src_; + ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Lowbd2dVarTest); + +void Lowbd2dVarTest::RunTest(bool is_random) { + int failed = 0; + for (int k = 0; k < kNumIterations; k++) { + const int width = 4 * (rnd_(63) + 1); // Up to 256x256 + const int height = 4 * (rnd_(63) + 1); // Up to 256x256 + int stride = 4 << rnd_(8); // Up to 512 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(8); + } + if (is_random) { + GenRandomData(width, height, stride); + } else { + GenExtremeData(width, height, stride); + } + + const uint64_t res_ref = params_.ref_func(src_, stride, width, height); + uint64_t res_tst; + API_REGISTER_STATE_CHECK(res_tst = + params_.tst_func(src_, stride, width, height)); + + if (!failed) { + failed = res_ref != res_tst; + EXPECT_EQ(res_ref, res_tst) + << "Error: Sum Squares Test [" << width << "x" << height + << "] C output does not match optimized output."; + } + } +} + +void Lowbd2dVarTest::RunSpeedTest() { + for (int block = 0; block < 2; block++) { + const int width = test_block_size[block]; + const int height = test_block_size[block]; + int stride = 4 << rnd_(8); // Up to 512 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(8); + } + GenExtremeData(width, height, stride); + const int num_loops = 1000000000 / (width + height); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + + for (int i = 0; i < num_loops; ++i) + params_.ref_func(src_, stride, width, height); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + + aom_usec_timer timer1; + aom_usec_timer_start(&timer1); + for (int i = 0; i < num_loops; ++i) + params_.tst_func(src_, stride, width, height); + aom_usec_timer_mark(&timer1); + const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); + printf("%3dx%-3d: Scaling = %.2f\n", width, height, + (double)elapsed_time / elapsed_time1); + } +} + +TEST_P(Lowbd2dVarTest, OperationCheck) { + RunTest(true); // GenRandomData +} + +TEST_P(Lowbd2dVarTest, ExtremeValues) { + RunTest(false); // GenExtremeData +} + +TEST_P(Lowbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); } + +#if HAVE_SSE2 + +INSTANTIATE_TEST_SUITE_P(SSE2, Lowbd2dVarTest, + ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c, + &aom_var_2d_u8_sse2))); + +#endif // HAVE_SSE2 + +#if HAVE_AVX2 + +INSTANTIATE_TEST_SUITE_P(AVX2, Lowbd2dVarTest, + ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c, + &aom_var_2d_u8_avx2))); + +#endif // HAVE_SSE2 + +#if HAVE_NEON + +INSTANTIATE_TEST_SUITE_P(NEON, Lowbd2dVarTest, + ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c, + &aom_var_2d_u8_neon))); + +#endif // HAVE_NEON + +#if HAVE_NEON_DOTPROD + +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, Lowbd2dVarTest, + ::testing::Values(TestFuncVar2D( + &aom_var_2d_u8_c, &aom_var_2d_u8_neon_dotprod))); + +#endif // HAVE_NEON_DOTPROD + +class Highbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> { + public: + ~Highbd2dVarTest() override = default; + void SetUp() override { + params_ = this->GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, 512 * 512 * sizeof(uint16_t))); + ASSERT_NE(src_, nullptr); + } + + void TearDown() override { aom_free(src_); } + void RunTest(bool is_random); + void RunSpeedTest(); + + void GenRandomData(int width, int height, int stride) { + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src_[ii * stride + jj] = rnd_(limit); + } + } + } + + void GenExtremeData(int width, int height, int stride) { + const int msb = 11; // Up to 12 bit input + const int limit = 1 << (msb + 1); + const int val = rnd_(2) ? limit - 1 : 0; + for (int ii = 0; ii < height; ii++) { + for (int jj = 0; jj < width; jj++) { + src_[ii * stride + jj] = val; + } + } + } + + protected: + TestFuncVar2D params_; + uint16_t *src_; + ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Highbd2dVarTest); + +void Highbd2dVarTest::RunTest(bool is_random) { + int failed = 0; + for (int k = 0; k < kNumIterations; k++) { + const int width = 4 * (rnd_(63) + 1); // Up to 256x256 + const int height = 4 * (rnd_(63) + 1); // Up to 256x256 + int stride = 4 << rnd_(8); // Up to 512 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(8); + } + if (is_random) { + GenRandomData(width, height, stride); + } else { + GenExtremeData(width, height, stride); + } + + const uint64_t res_ref = + params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height); + uint64_t res_tst; + API_REGISTER_STATE_CHECK( + res_tst = + params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height)); + + if (!failed) { + failed = res_ref != res_tst; + EXPECT_EQ(res_ref, res_tst) + << "Error: Sum Squares Test [" << width << "x" << height + << "] C output does not match optimized output."; + } + } +} + +void Highbd2dVarTest::RunSpeedTest() { + for (int block = 0; block < 2; block++) { + const int width = test_block_size[block]; + const int height = test_block_size[block]; + int stride = 4 << rnd_(8); // Up to 512 stride + while (stride < width) { // Make sure it's valid + stride = 4 << rnd_(8); + } + GenExtremeData(width, height, stride); + const int num_loops = 1000000000 / (width + height); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + + for (int i = 0; i < num_loops; ++i) + params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + + aom_usec_timer timer1; + aom_usec_timer_start(&timer1); + for (int i = 0; i < num_loops; ++i) + params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height); + aom_usec_timer_mark(&timer1); + const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); + printf("%3dx%-3d: Scaling = %.2f\n", width, height, + (double)elapsed_time / elapsed_time1); + } +} + +TEST_P(Highbd2dVarTest, OperationCheck) { + RunTest(true); // GenRandomData +} + +TEST_P(Highbd2dVarTest, ExtremeValues) { + RunTest(false); // GenExtremeData +} + +TEST_P(Highbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); } + +#if HAVE_SSE2 + +INSTANTIATE_TEST_SUITE_P( + SSE2, Highbd2dVarTest, + ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_sse2))); + +#endif // HAVE_SSE2 + +#if HAVE_AVX2 + +INSTANTIATE_TEST_SUITE_P( + AVX2, Highbd2dVarTest, + ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_avx2))); + +#endif // HAVE_SSE2 + +#if HAVE_NEON + +INSTANTIATE_TEST_SUITE_P( + NEON, Highbd2dVarTest, + ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_neon))); + +#endif // HAVE_NEON + +#if HAVE_SVE + +INSTANTIATE_TEST_SUITE_P(SVE, Highbd2dVarTest, + ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, + &aom_var_2d_u16_sve))); + +#endif // HAVE_SVE +} // namespace diff --git a/third_party/aom/test/svc_datarate_test.cc b/third_party/aom/test/svc_datarate_test.cc new file mode 100644 index 0000000000..cc3fb674b3 --- /dev/null +++ b/third_party/aom/test/svc_datarate_test.cc @@ -0,0 +1,2675 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <climits> +#include <vector> +#include "config/aom_config.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/datarate_test.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "aom/aom_codec.h" +#include "av1/common/enums.h" +#include "av1/encoder/encoder.h" + +namespace datarate_test { +namespace { + +struct FrameInfo { + FrameInfo(aom_codec_pts_t _pts, unsigned int _w, unsigned int _h) + : pts(_pts), w(_w), h(_h) {} + + aom_codec_pts_t pts; + unsigned int w; + unsigned int h; +}; + +class DatarateTestSVC + : public ::libaom_test::CodecTestWith4Params<libaom_test::TestMode, int, + unsigned int, int>, + public DatarateTest { + public: + DatarateTestSVC() : DatarateTest(GET_PARAM(0)) { + set_cpu_used_ = GET_PARAM(2); + aq_mode_ = GET_PARAM(3); + } + + protected: + void SetUp() override { + InitializeConfig(GET_PARAM(1)); + ResetModel(); + } + + void DecompressedFrameHook(const aom_image_t &img, + aom_codec_pts_t pts) override { + frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); + ++decoded_nframes_; + } + + std::vector<FrameInfo> frame_info_list_; + + int GetNumSpatialLayers() override { return number_spatial_layers_; } + + void ResetModel() override { + DatarateTest::ResetModel(); + layer_frame_cnt_ = 0; + superframe_cnt_ = 0; + number_temporal_layers_ = 1; + number_spatial_layers_ = 1; + for (int i = 0; i < AOM_MAX_LAYERS; i++) { + target_layer_bitrate_[i] = 0; + effective_datarate_tl[i] = 0.0; + } + memset(&layer_id_, 0, sizeof(aom_svc_layer_id_t)); + memset(&svc_params_, 0, sizeof(aom_svc_params_t)); + memset(&ref_frame_config_, 0, sizeof(aom_svc_ref_frame_config_t)); + memset(&ref_frame_comp_pred_, 0, sizeof(aom_svc_ref_frame_comp_pred_t)); + drop_frames_ = 0; + for (int i = 0; i < 1000; i++) drop_frames_list_[i] = 1000; + decoded_nframes_ = 0; + mismatch_nframes_ = 0; + mismatch_psnr_ = 0.0; + set_frame_level_er_ = 0; + multi_ref_ = 0; + use_fixed_mode_svc_ = 0; + comp_pred_ = 0; + dynamic_enable_disable_mode_ = 0; + intra_only_ = 0; + frame_to_start_decoding_ = 0; + layer_to_decode_ = 0; + frame_sync_ = 0; + current_video_frame_ = 0; + screen_mode_ = 0; + rps_mode_ = 0; + rps_recovery_frame_ = 0; + user_define_frame_qp_ = 0; + set_speed_per_layer_ = false; + simulcast_mode_ = false; + } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + int spatial_layer_id = 0; + current_video_frame_ = video->frame(); + // video->frame() is called every superframe, so we should condition + // this on layer_frame_cnt_ = 0, so we only do this once on the very + // first frame. + if (video->frame() == 0 && layer_frame_cnt_ == 0) { + initialize_svc(number_temporal_layers_, number_spatial_layers_, + &svc_params_); + if (dynamic_enable_disable_mode_ == 1) { + svc_params_.layer_target_bitrate[2] = 0; + cfg_.rc_target_bitrate -= target_layer_bitrate_[2]; + } + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + // TODO(aomedia:3032): Configure KSVC in fixed mode. + encoder->Control(AV1E_SET_ENABLE_ORDER_HINT, 0); + encoder->Control(AV1E_SET_ENABLE_TPL_MODEL, 0); + encoder->Control(AV1E_SET_DELTAQ_MODE, 0); + if (cfg_.g_threads > 1) { + if (cfg_.g_threads == 4) { + encoder->Control(AV1E_SET_TILE_COLUMNS, 2); + encoder->Control(AV1E_SET_TILE_ROWS, 2); + } else if (cfg_.g_threads == 8) { + encoder->Control(AV1E_SET_TILE_COLUMNS, 4); + encoder->Control(AV1E_SET_TILE_ROWS, 2); + } else { + encoder->Control(AV1E_SET_TILE_COLUMNS, cfg_.g_threads >> 1); + } + encoder->Control(AV1E_SET_ROW_MT, 1); + } + if (screen_mode_) { + encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN); + } + } + if (number_spatial_layers_ == 2) { + spatial_layer_id = (layer_frame_cnt_ % 2 == 0) ? 0 : 1; + } else if (number_spatial_layers_ == 3) { + spatial_layer_id = (layer_frame_cnt_ % 3 == 0) ? 0 + : ((layer_frame_cnt_ - 1) % 3 == 0) ? 1 + : 2; + } + // Set the reference/update flags, layer_id, and reference_map + // buffer index. + frame_flags_ = set_layer_pattern( + video->frame(), &layer_id_, &ref_frame_config_, &ref_frame_comp_pred_, + spatial_layer_id, multi_ref_, comp_pred_, + (video->frame() % cfg_.kf_max_dist) == 0, dynamic_enable_disable_mode_, + rps_mode_, rps_recovery_frame_, simulcast_mode_); + if (intra_only_ == 1 && frame_sync_ > 0) { + // Set an Intra-only frame on SL0 at frame_sync_. + // In order to allow decoding to start on SL0 in mid-sequence we need to + // set and refresh all the slots used on SL0 stream, which is 0 and 3 + // for this test pattern. The other slots (1, 2, 4, 5) are used for the + // SL > 0 layers and these slotes are not refreshed on frame_sync_, so + // temporal prediction for the top layers can continue. + if (spatial_layer_id == 0 && video->frame() == frame_sync_) { + ref_frame_config_.ref_idx[0] = 0; + ref_frame_config_.ref_idx[3] = 3; + ref_frame_config_.refresh[0] = 1; + ref_frame_config_.refresh[3] = 1; + for (int i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config_.reference[i] = 0; + } + } + if (intra_only_ && video->frame() == 50 && spatial_layer_id == 1) { + // Force an intra_only frame here, for SL1. + for (int i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config_.reference[i] = 0; + } + encoder->Control(AV1E_SET_SVC_LAYER_ID, &layer_id_); + // The SET_SVC_REF_FRAME_CONFIG and AV1E_SET_SVC_REF_FRAME_COMP_PRED api is + // for the flexible SVC mode (i.e., use_fixed_mode_svc == 0). + if (!use_fixed_mode_svc_) { + encoder->Control(AV1E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config_); + encoder->Control(AV1E_SET_SVC_REF_FRAME_COMP_PRED, &ref_frame_comp_pred_); + } + if (set_speed_per_layer_) { + int speed_per_layer = 10; + if (layer_id_.spatial_layer_id == 0) { + // For for base SL0,TL0: use the speed the test loops over. + if (layer_id_.temporal_layer_id == 1) speed_per_layer = 7; + if (layer_id_.temporal_layer_id == 2) speed_per_layer = 8; + } else if (layer_id_.spatial_layer_id == 1) { + if (layer_id_.temporal_layer_id == 0) speed_per_layer = 7; + if (layer_id_.temporal_layer_id == 1) speed_per_layer = 8; + if (layer_id_.temporal_layer_id == 2) speed_per_layer = 9; + } else if (layer_id_.spatial_layer_id == 2) { + if (layer_id_.temporal_layer_id == 0) speed_per_layer = 8; + if (layer_id_.temporal_layer_id == 1) speed_per_layer = 9; + if (layer_id_.temporal_layer_id == 2) speed_per_layer = 10; + } + encoder->Control(AOME_SET_CPUUSED, speed_per_layer); + } + if (set_frame_level_er_) { + int mode = + (layer_id_.spatial_layer_id > 0 || layer_id_.temporal_layer_id > 0); + encoder->Control(AV1E_SET_ERROR_RESILIENT_MODE, mode); + } + if (dynamic_enable_disable_mode_ == 1) { + if (layer_frame_cnt_ == 300 && spatial_layer_id == 0) { + // Enable: set top spatial layer bitrate back to non-zero. + svc_params_.layer_target_bitrate[2] = target_layer_bitrate_[2]; + cfg_.rc_target_bitrate += target_layer_bitrate_[2]; + encoder->Config(&cfg_); + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + } + } else if (dynamic_enable_disable_mode_ == 2) { + if (layer_frame_cnt_ == 300 && spatial_layer_id == 0) { + // Disable top spatial layer mid-stream. + svc_params_.layer_target_bitrate[2] = 0; + cfg_.rc_target_bitrate -= target_layer_bitrate_[2]; + encoder->Config(&cfg_); + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + } else if (layer_frame_cnt_ == 600 && spatial_layer_id == 0) { + // Enable top spatial layer mid-stream. + svc_params_.layer_target_bitrate[2] = target_layer_bitrate_[2]; + cfg_.rc_target_bitrate += target_layer_bitrate_[2]; + encoder->Config(&cfg_); + encoder->Control(AV1E_SET_SVC_PARAMS, &svc_params_); + } + } + layer_frame_cnt_++; + DatarateTest::PreEncodeFrameHook(video, encoder); + + if (user_define_frame_qp_) { + frame_qp_ = rnd_.PseudoUniform(63); + encoder->Control(AV1E_SET_QUANTIZER_ONE_PASS, frame_qp_); + } + } + + void PostEncodeFrameHook(::libaom_test::Encoder *encoder) override { + int num_operating_points; + encoder->Control(AV1E_GET_NUM_OPERATING_POINTS, &num_operating_points); + ASSERT_EQ(num_operating_points, + number_temporal_layers_ * number_spatial_layers_); + + if (user_define_frame_qp_) { + if (current_video_frame_ >= static_cast<unsigned int>(total_frame_)) + return; + int qp; + encoder->Control(AOME_GET_LAST_QUANTIZER_64, &qp); + ASSERT_EQ(qp, frame_qp_); + } + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + const size_t frame_size_in_bits = pkt->data.frame.sz * 8; + // Update the layer cumulative bitrate. + for (int i = layer_id_.temporal_layer_id; i < number_temporal_layers_; + i++) { + int layer = layer_id_.spatial_layer_id * number_temporal_layers_ + i; + effective_datarate_tl[layer] += 1.0 * frame_size_in_bits; + } + if (layer_id_.spatial_layer_id == number_spatial_layers_ - 1) { + last_pts_ = pkt->data.frame.pts; + superframe_cnt_++; + } + // For simulcast mode: verify that for first frame to start decoding, + // for SL > 0, are Intra-only frames (not Key), whereas SL0 is Key. + if (simulcast_mode_ && superframe_cnt_ == (int)frame_to_start_decoding_) { + if (layer_id_.spatial_layer_id > 0) { + EXPECT_NE(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + } else if (layer_id_.spatial_layer_id == 0) { + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + } + } + } + + void EndPassHook() override { + duration_ = ((last_pts_ + 1) * timebase_); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + effective_datarate_tl[i] = (effective_datarate_tl[i] / 1000) / duration_; + } + } + + bool DoDecode() const override { + if (drop_frames_ > 0) { + for (unsigned int i = 0; i < drop_frames_; ++i) { + if (drop_frames_list_[i] == (unsigned int)superframe_cnt_) { + std::cout << " Skipping decoding frame: " + << drop_frames_list_[i] << "\n"; + return false; + } + } + } else if (intra_only_ == 1) { + // Only start decoding at frames_to_start_decoding_. + if (current_video_frame_ < frame_to_start_decoding_) return false; + // Only decode base layer for 3SL, for layer_to_decode_ = 0. + if (layer_to_decode_ == 0 && frame_sync_ > 0 && + (layer_frame_cnt_ - 1) % 3 != 0) + return false; + } else if (simulcast_mode_) { + // Only start decoding at frames_to_start_decoding_ and only + // for top spatial layer SL2 (layer_to_decode_). + if (current_video_frame_ < frame_to_start_decoding_) return false; + if (layer_id_.spatial_layer_id < (int)layer_to_decode_) return false; + } + return true; + } + + void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) override { + double mismatch_psnr = compute_psnr(img1, img2); + mismatch_psnr_ += mismatch_psnr; + ++mismatch_nframes_; + } + + unsigned int GetMismatchFrames() { return mismatch_nframes_; } + unsigned int GetDecodedFrames() { return decoded_nframes_; } + + static void ref_config_rps(aom_svc_ref_frame_config_t *ref_frame_config, + int frame_cnt, int rps_recovery_frame) { + // Pattern of 3 references with (ALTREF and GOLDEN) trailing + // LAST by 4 and 8 frame, with some switching logic to + // only predict from longer-term reference. + int last_idx = 0; + int last_idx_refresh = 0; + int gld_idx = 0; + int alt_ref_idx = 0; + const int lag_alt = 4; + const int lag_gld = 8; + const int sh = 8; // slots 0 - 7. + // Moving index slot for last: 0 - (sh - 1) + if (frame_cnt > 1) last_idx = (frame_cnt - 1) % sh; + // Moving index for refresh of last: one ahead for next frame. + last_idx_refresh = frame_cnt % sh; + // Moving index for gld_ref, lag behind current by lag_gld + if (frame_cnt > lag_gld) gld_idx = (frame_cnt - lag_gld) % sh; + // Moving index for alt_ref, lag behind LAST by lag_alt frames. + if (frame_cnt > lag_alt) alt_ref_idx = (frame_cnt - lag_alt) % sh; + // Set the ref_idx. + // Default all references (7) to slot for last. + // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3), + // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6). + for (int i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = last_idx; + // Set the ref_idx for the relevant references. + ref_frame_config->ref_idx[0] = last_idx; + ref_frame_config->ref_idx[1] = last_idx_refresh; + ref_frame_config->ref_idx[3] = gld_idx; + ref_frame_config->ref_idx[6] = alt_ref_idx; + // Refresh this slot, which will become LAST on next frame. + ref_frame_config->refresh[last_idx_refresh] = 1; + // Reference LAST, ALTREF, and GOLDEN + ref_frame_config->reference[0] = 1; + ref_frame_config->reference[6] = 1; + ref_frame_config->reference[3] = 1; + if (frame_cnt == rps_recovery_frame) { + // Switch to only reference GOLDEN at recovery_frame. + ref_frame_config->reference[0] = 0; + ref_frame_config->reference[6] = 0; + ref_frame_config->reference[3] = 1; + } else if (frame_cnt > rps_recovery_frame && + frame_cnt < rps_recovery_frame + 8) { + // Go back to predicting from LAST, and after + // 8 frames (GOLDEN is 8 frames aways) go back + // to predicting off GOLDEN and ALTREF. + ref_frame_config->reference[0] = 1; + ref_frame_config->reference[6] = 0; + ref_frame_config->reference[3] = 0; + } + } + + // Simulcast mode for 3 spatial and 3 temporal layers. + // No inter-layer predicton, only prediction is temporal and single + // reference (LAST). + // No overlap in buffer slots between spatial layers. So for example, + // SL0 only uses slots 0 and 1. + // SL1 only uses slots 2 and 3. + // SL2 only uses slots 4 and 5. + // All 7 references for each inter-frame must only access buffer slots + // for that spatial layer. + // On key (super)frames: SL1 and SL2 must have no references set + // and must refresh all the slots for that layer only (so 2 and 3 + // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally + // as a Key frame (refresh all slots). SL1/SL2 will be labelled + // internally as Intra-only frames that allow that stream to be decoded. + // These conditions will allow for each spatial stream to be + // independently decodeable. + static void ref_config_simulcast3SL3TL( + aom_svc_ref_frame_config_t *ref_frame_config, + aom_svc_layer_id_t *layer_id, int is_key_frame, int superframe_cnt) { + int i; + // Initialize all references to 0 (don't use reference). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->reference[i] = 0; + // Initialize as no refresh/update for all slots. + for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = 0; + + if (is_key_frame) { + if (layer_id->spatial_layer_id == 0) { + // Assign LAST/GOLDEN to slot 0/1. + // Refesh slots 0 and 1 for SL0. + // SL0: this will get set to KEY frame internally. + ref_frame_config->ref_idx[0] = 0; + ref_frame_config->ref_idx[3] = 1; + ref_frame_config->refresh[0] = 1; + ref_frame_config->refresh[1] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Assign LAST/GOLDEN to slot 2/3. + // Refesh slots 2 and 3 for SL1. + // This will get set to Intra-only frame internally. + ref_frame_config->ref_idx[0] = 2; + ref_frame_config->ref_idx[3] = 3; + ref_frame_config->refresh[2] = 1; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Assign LAST/GOLDEN to slot 4/5. + // Refresh slots 4 and 5 for SL2. + // This will get set to Intra-only frame internally. + ref_frame_config->ref_idx[0] = 4; + ref_frame_config->ref_idx[3] = 5; + ref_frame_config->refresh[4] = 1; + ref_frame_config->refresh[5] = 1; + } + } else if (superframe_cnt % 4 == 0) { + // Base temporal layer: TL0 + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { // SL0 + // Reference LAST. Assign all references to either slot + // 0 or 1. Here we assign LAST to slot 0, all others to 1. + // Update slot 0 (LAST). + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[0] = 0; + ref_frame_config->refresh[0] = 1; + } else if (layer_id->spatial_layer_id == 1) { // SL1 + // Reference LAST. Assign all references to either slot + // 2 or 3. Here we assign LAST to slot 2, all others to 3. + // Update slot 2 (LAST). + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[0] = 2; + ref_frame_config->refresh[2] = 1; + } else if (layer_id->spatial_layer_id == 2) { // SL2 + // Reference LAST. Assign all references to either slot + // 4 or 5. Here we assign LAST to slot 4, all others to 5. + // Update slot 4 (LAST). + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 5; + ref_frame_config->ref_idx[0] = 4; + ref_frame_config->refresh[4] = 1; + } + } else if ((superframe_cnt - 1) % 4 == 0) { + // First top temporal enhancement layer: TL2 + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { // SL0 + // Reference LAST (slot 0). Assign other references to slot 1. + // No update/refresh on any slots. + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[0] = 0; + } else if (layer_id->spatial_layer_id == 1) { // SL1 + // Reference LAST (slot 2). Assign other references to slot 3. + // No update/refresh on any slots. + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[0] = 2; + } else if (layer_id->spatial_layer_id == 2) { // SL2 + // Reference LAST (slot 4). Assign other references to slot 4. + // No update/refresh on any slots. + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 5; + ref_frame_config->ref_idx[0] = 4; + } + } else if ((superframe_cnt - 2) % 4 == 0) { + // Middle temporal enhancement layer: TL1 + layer_id->temporal_layer_id = 1; + if (layer_id->spatial_layer_id == 0) { // SL0 + // Reference LAST (slot 0). + // Set GOLDEN to slot 1 and update slot 1. + // This will be used as reference for next TL2. + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[0] = 0; + ref_frame_config->refresh[1] = 1; + } else if (layer_id->spatial_layer_id == 1) { // SL1 + // Reference LAST (slot 2). + // Set GOLDEN to slot 3 and update slot 3. + // This will be used as reference for next TL2. + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[0] = 2; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 2) { // SL2 + // Reference LAST (slot 4). + // Set GOLDEN to slot 5 and update slot 5. + // This will be used as reference for next TL2. + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 5; + ref_frame_config->ref_idx[0] = 4; + ref_frame_config->refresh[5] = 1; + } + } else if ((superframe_cnt - 3) % 4 == 0) { + // Second top temporal enhancement layer: TL2 + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { // SL0 + // Reference LAST (slot 1). Assign other references to slot 0. + // No update/refresh on any slots. + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 1; + } else if (layer_id->spatial_layer_id == 1) { // SL1 + // Reference LAST (slot 3). Assign other references to slot 2. + // No update/refresh on any slots. + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 2; + ref_frame_config->ref_idx[0] = 3; + } else if (layer_id->spatial_layer_id == 2) { // SL2 + // Reference LAST (slot 5). Assign other references to slot 4. + // No update/refresh on any slots. + ref_frame_config->reference[0] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 4; + ref_frame_config->ref_idx[0] = 5; + } + } + } + + // 3 spatial and 3 temporal layer. + // Overlap in the buffer slot updates: the slots 3 and 4 updated by + // first TL2 are reused for update in TL1 superframe. + static void ref_config_3SL3TL(aom_svc_ref_frame_config_t *ref_frame_config, + aom_svc_layer_id_t *layer_id, int is_key_frame, + int superframe_cnt) { + if (superframe_cnt % 4 == 0) { + // Base temporal layer. + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST. + // Set all buffer_idx to 0. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0; + ref_frame_config->refresh[0] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 0. + // Update slot 1 (LAST). + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->refresh[1] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, + // GOLDEN (and all other refs) to slot 1. + // Update slot 2 (LAST). + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[0] = 2; + ref_frame_config->refresh[2] = 1; + } + } else if ((superframe_cnt - 1) % 4 == 0) { + // First top temporal enhancement layer. + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST (slot 0). + // Set GOLDEN to slot 3 and update slot 3. + // Set all other buffer_idx to slot 0. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[3] = 3; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 3. + // Set LAST2 to slot 4 and Update slot 4. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->ref_idx[1] = 4; + ref_frame_config->refresh[4] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, + // GOLDEN (and all other refs) to slot 4. + // No update. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 4; + ref_frame_config->ref_idx[0] = 2; + } + } else if ((superframe_cnt - 2) % 4 == 0) { + // Middle temporal enhancement layer. + layer_id->temporal_layer_id = 1; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST. + // Set all buffer_idx to 0. + // Set GOLDEN to slot 3 and update slot 3. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[3] = 3; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 3. + // Set LAST2 to slot 4 and update slot 4. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->ref_idx[2] = 4; + ref_frame_config->refresh[4] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, + // GOLDEN (and all other refs) to slot 4. + // Set LAST2 to slot 5 and update slot 5. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 4; + ref_frame_config->ref_idx[0] = 2; + ref_frame_config->ref_idx[2] = 5; + ref_frame_config->refresh[5] = 1; + } + } else if ((superframe_cnt - 3) % 4 == 0) { + // Second top temporal enhancement layer. + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { + // Set LAST to slot 3 and reference LAST. + // Set GOLDEN to slot 3 and update slot 3. + // Set all other buffer_idx to 0. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 3; + ref_frame_config->ref_idx[3] = 3; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 4, + // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 4; + ref_frame_config->ref_idx[3] = 3; + ref_frame_config->ref_idx[1] = 4; + ref_frame_config->refresh[4] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 5, + // GOLDEN to slot 4. No update. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 5; + ref_frame_config->ref_idx[3] = 4; + } + } + if (layer_id->spatial_layer_id > 0) { + // Always reference GOLDEN (inter-layer prediction). + ref_frame_config->reference[3] = 1; + if (is_key_frame && layer_id->spatial_layer_id > 0) { + // On superframes whose base is key: remove LAST since GOLDEN + // is used as reference. + ref_frame_config->reference[0] = 0; + } + } + } + + // Layer pattern configuration. + virtual int set_layer_pattern( + int frame_cnt, aom_svc_layer_id_t *layer_id, + aom_svc_ref_frame_config_t *ref_frame_config, + aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int spatial_layer, + int multi_ref, int comp_pred, int is_key_frame, + int dynamic_enable_disable_mode, int rps_mode, int rps_recovery_frame, + int simulcast_mode) { + int lag_index = 0; + int base_count = frame_cnt >> 2; + layer_id->spatial_layer_id = spatial_layer; + // Set the reference map buffer idx for the 7 references: + // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3), + // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6). + for (int i = 0; i < INTER_REFS_PER_FRAME; i++) { + ref_frame_config->ref_idx[i] = i; + ref_frame_config->reference[i] = 0; + } + for (int i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0; + if (comp_pred) { + ref_frame_comp_pred->use_comp_pred[0] = 1; // GOLDEN_LAST + ref_frame_comp_pred->use_comp_pred[1] = 1; // LAST2_LAST + ref_frame_comp_pred->use_comp_pred[2] = 1; // ALTREF_LAST + } + // Set layer_flags to 0 when using ref_frame_config->reference. + int layer_flags = 0; + // Always reference LAST. + ref_frame_config->reference[0] = 1; + if (number_temporal_layers_ == 1 && number_spatial_layers_ == 1) { + ref_frame_config->refresh[0] = 1; + if (rps_mode) + ref_config_rps(ref_frame_config, frame_cnt, rps_recovery_frame); + } + if (number_temporal_layers_ == 2 && number_spatial_layers_ == 1) { + // 2-temporal layer. + // 1 3 5 + // 0 2 4 + // Keep golden fixed at slot 3. + base_count = frame_cnt >> 1; + ref_frame_config->ref_idx[3] = 3; + // Cyclically refresh slots 5, 6, 7, for lag alt ref. + lag_index = 5; + if (base_count > 0) { + lag_index = 5 + (base_count % 3); + if (frame_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3); + } + // Set the altref slot to lag_index. + ref_frame_config->ref_idx[6] = lag_index; + if (frame_cnt % 2 == 0) { + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, reference LAST. + ref_frame_config->refresh[0] = 1; + ref_frame_config->reference[0] = 1; + // Refresh lag_index slot, needed for lagging golen. + ref_frame_config->refresh[lag_index] = 1; + // Refresh GOLDEN every x base layer frames. + if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1; + } else { + layer_id->temporal_layer_id = 1; + // No updates on layer 1, reference LAST (TL0). + ref_frame_config->reference[0] = 1; + } + // Always reference golden and altref on TL0. + if (layer_id->temporal_layer_id == 0) { + ref_frame_config->reference[3] = 1; + ref_frame_config->reference[6] = 1; + } + } else if (number_temporal_layers_ == 3 && number_spatial_layers_ == 1) { + // 3-layer: + // 1 3 5 7 + // 2 6 + // 0 4 8 + if (multi_ref) { + // Keep golden fixed at slot 3. + ref_frame_config->ref_idx[3] = 3; + // Cyclically refresh slots 4, 5, 6, 7, for lag altref. + lag_index = 4 + (base_count % 4); + // Set the altref slot to lag_index. + ref_frame_config->ref_idx[6] = lag_index; + } + if (frame_cnt % 4 == 0) { + // Base layer. + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, reference LAST and GF. + ref_frame_config->refresh[0] = 1; + ref_frame_config->reference[3] = 1; + if (multi_ref) { + // Refresh GOLDEN every x ~10 base layer frames. + if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1; + // Refresh lag_index slot, needed for lagging altref. + ref_frame_config->refresh[lag_index] = 1; + } + } else if ((frame_cnt - 1) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // First top layer: no updates, only reference LAST (TL0). + } else if ((frame_cnt - 2) % 4 == 0) { + layer_id->temporal_layer_id = 1; + // Middle layer (TL1): update LAST2, only reference LAST (TL0). + ref_frame_config->refresh[1] = 1; + } else if ((frame_cnt - 3) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // Second top layer: no updates, only reference LAST. + // Set buffer idx for LAST to slot 1, since that was the slot + // updated in previous frame. So LAST is TL1 frame. + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->ref_idx[1] = 0; + } + if (multi_ref) { + // Every frame can reference GOLDEN AND ALTREF. + ref_frame_config->reference[3] = 1; + ref_frame_config->reference[6] = 1; + } + } else if (number_temporal_layers_ == 1 && number_spatial_layers_ == 2) { + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST. Keep LAST and GOLDEN in slots 0 and 3. + ref_frame_config->ref_idx[0] = 0; + ref_frame_config->ref_idx[3] = 3; + ref_frame_config->refresh[0] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 3 + // and GOLDEN to slot 0. Update slot 3 (LAST). + ref_frame_config->ref_idx[0] = 3; + ref_frame_config->ref_idx[3] = 0; + ref_frame_config->refresh[3] = 1; + } + // Reference GOLDEN. + if (layer_id->spatial_layer_id > 0) ref_frame_config->reference[3] = 1; + } else if (number_temporal_layers_ == 1 && number_spatial_layers_ == 3) { + // 3 spatial layers, 1 temporal. + // Note for this case , we set the buffer idx for all references to be + // either LAST or GOLDEN, which are always valid references, since decoder + // will check if any of the 7 references is valid scale in + // valid_ref_frame_size(). + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST. Set all other buffer_idx to 0. + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0; + ref_frame_config->refresh[0] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1 + // and GOLDEN (and all other refs) to slot 0. + // Update slot 1 (LAST). + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->refresh[1] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2 + // and GOLDEN (and all other refs) to slot 1. + // Update slot 2 (LAST). + for (int i = 0; i < 7; i++) ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[0] = 2; + ref_frame_config->refresh[2] = 1; + if (multi_ref) { + ref_frame_config->ref_idx[6] = 7; + ref_frame_config->reference[6] = 1; + if (base_count % 10 == 0) ref_frame_config->refresh[7] = 1; + } + } + // Reference GOLDEN. + if (layer_id->spatial_layer_id > 0) ref_frame_config->reference[3] = 1; + } else if (number_temporal_layers_ == 3 && number_spatial_layers_ == 3) { + if (simulcast_mode) { + ref_config_simulcast3SL3TL(ref_frame_config, layer_id, is_key_frame, + superframe_cnt_); + } else { + ref_config_3SL3TL(ref_frame_config, layer_id, is_key_frame, + superframe_cnt_); + // Allow for top spatial layer to use additional temporal reference. + // Additional reference is only updated on base temporal layer, every + // 10 TL0 frames here. + if (multi_ref && layer_id->spatial_layer_id == 2) { + ref_frame_config->ref_idx[6] = 7; + if (!is_key_frame) ref_frame_config->reference[6] = 1; + if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0) + ref_frame_config->refresh[7] = 1; + } + } + } + // If the top spatial layer is first-time encoded in mid-sequence + // (i.e., dynamic_enable_disable_mode = 1), then don't predict from LAST, + // since it will have been last updated on first key frame (SL0) and so + // be different resolution from SL2. + if (dynamic_enable_disable_mode == 1 && + layer_id->spatial_layer_id == number_spatial_layers_ - 1) + ref_frame_config->reference[0] = 0; + return layer_flags; + } + + virtual void initialize_svc(int number_temporal_layers, + int number_spatial_layers, + aom_svc_params *svc_params) { + svc_params->number_spatial_layers = number_spatial_layers; + svc_params->number_temporal_layers = number_temporal_layers; + for (int i = 0; i < number_temporal_layers * number_spatial_layers; ++i) { + svc_params->max_quantizers[i] = 60; + svc_params->min_quantizers[i] = 2; + svc_params->layer_target_bitrate[i] = target_layer_bitrate_[i]; + } + // Do at most 3 spatial or temporal layers here. + svc_params->framerate_factor[0] = 1; + if (number_temporal_layers == 2) { + svc_params->framerate_factor[0] = 2; + svc_params->framerate_factor[1] = 1; + } else if (number_temporal_layers == 3) { + svc_params->framerate_factor[0] = 4; + svc_params->framerate_factor[1] = 2; + svc_params->framerate_factor[2] = 1; + } + svc_params->scaling_factor_num[0] = 1; + svc_params->scaling_factor_den[0] = 1; + if (number_spatial_layers == 2) { + svc_params->scaling_factor_num[0] = 1; + svc_params->scaling_factor_den[0] = 2; + svc_params->scaling_factor_num[1] = 1; + svc_params->scaling_factor_den[1] = 1; + } else if (number_spatial_layers == 3) { + svc_params->scaling_factor_num[0] = 1; + svc_params->scaling_factor_den[0] = 4; + svc_params->scaling_factor_num[1] = 1; + svc_params->scaling_factor_den[1] = 2; + svc_params->scaling_factor_num[2] = 1; + svc_params->scaling_factor_den[2] = 1; + } + } + + virtual void BasicRateTargetingSVC3TL1SLTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 1; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Top temporal layers are non_reference, so exlcude them from + // mismatch count, since loopfilter/cdef is not applied for these on + // encoder side, but is always applied on decoder. + // This means 150 = #frames(300) - #TL2_frames(150). + EXPECT_EQ((int)GetMismatchFrames(), 150); + } + + virtual void SetFrameQpSVC3TL1SLTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 1; + + user_define_frame_qp_ = 1; + total_frame_ = 300; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + virtual void SetFrameQpSVC3TL3SLTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + user_define_frame_qp_ = 1; + total_frame_ = 300; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + virtual void BasicRateTargetingSVC3TL1SLScreenTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 60); + + const int bitrate_array[2] = { 800, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + screen_mode_ = 1; + number_temporal_layers_ = 3; + number_spatial_layers_ = 1; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.50) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.7) + << " The datarate for the file is greater than target by too much!"; + } + // Top temporal layers are non_reference, so exlcude them from + // mismatch count, since loopfilter/cdef is not applied for these on + // encoder side, but is always applied on decoder. + // This means 30 = #frames(60) - #TL2_frames(30). + // We use LE for screen since loopfilter level can become very small + // or zero and then the frame is not a mismatch. + EXPECT_LE((int)GetMismatchFrames(), 30); + } + + virtual void BasicRateTargetingSVC2TL1SLScreenDropFrameTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 30; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 52; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + + const int bitrate_array[2] = { 60, 100 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + screen_mode_ = 1; + number_temporal_layers_ = 2; + number_spatial_layers_ = 1; + target_layer_bitrate_[0] = 60 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.75) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.5) + << " The datarate for the file is greater than target by too much!"; + } + // Top temporal layers are non_reference, so exlcude them from + // mismatch count, since loopfilter/cdef is not applied for these on + // encoder side, but is always applied on decoder. + // This means 300 = #frames(300) - #TL2_frames(150). + // We use LE for screen since loopfilter level can become very small + // or zero and then the frame is not a mismatch. + EXPECT_LE((int)GetMismatchFrames(), 150); + } + + virtual void BasicRateTargetingSVC1TL3SLScreenTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + + const int bitrate_array[2] = { 800, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + screen_mode_ = 1; + number_temporal_layers_ = 1; + number_spatial_layers_ = 3; + target_layer_bitrate_[0] = 30 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 60 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.50) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.5) + << " The datarate for the file is greater than target by too much!"; + } + EXPECT_EQ((int)GetMismatchFrames(), 0); + } + + virtual void BasicRateTargetingSVC1TL1SLScreenScCutsMotionTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + + const int bitrate_array[2] = { 200, 500 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + screen_mode_ = 1; + number_temporal_layers_ = 1; + number_spatial_layers_ = 1; + target_layer_bitrate_[0] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.40) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.7) + << " The datarate for the file is greater than target by too much!"; + } + EXPECT_EQ((int)GetMismatchFrames(), 0); + } + + virtual void BasicRateTargetingSVC3TL1SLResizeTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.rc_resize_mode = RESIZE_DYNAMIC; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, 400); + cfg_.g_w = 640; + cfg_.g_h = 480; + const int bitrate_array[2] = { 80, 90 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + unsigned int last_w = cfg_.g_w; + unsigned int last_h = cfg_.g_h; + int resize_down_count = 0; + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + if (info->w != last_w || info->h != last_h) { + // Verify that resize down occurs. + ASSERT_LT(info->w, last_w); + ASSERT_LT(info->h, last_h); + last_w = info->w; + last_h = info->h; + resize_down_count++; + } + } + // Must be at least one resize down. + ASSERT_GE(resize_down_count, 1); + } + + virtual void BasicRateTargetingSVC1TL2SLTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 300, 600 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 1; + number_spatial_layers_ = 2; + target_layer_bitrate_[0] = 2 * cfg_.rc_target_bitrate / 4; + target_layer_bitrate_[1] = 2 * cfg_.rc_target_bitrate / 4; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC3TL3SLIntraStartDecodeBaseMidSeq() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 500, 1000 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + intra_only_ = 1; + frame_sync_ = 20; + frame_to_start_decoding_ = frame_sync_; + layer_to_decode_ = 0; + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Only check datarate on SL0 - this is layer that is decoded starting at + // frame_to_start_decoding_. + for (int i = 0; i < number_temporal_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.50) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Only base spatial layer is decoded and there are no non-referenece + // frames on S0, so #mismatch must be 0. + EXPECT_EQ((int)GetMismatchFrames(), 0); + } + + virtual void BasicRateTargetingSVC3TL3SLIntraMidSeqDecodeAll() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 500, 1000 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + intra_only_ = 1; + frame_sync_ = 20; + frame_to_start_decoding_ = 0; + layer_to_decode_ = 3; + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.585) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // All 3 spatial layers are decoded, starting at frame 0, so there are + // and there 300/2 = 150 non-reference frames, so mismatch is 150. + EXPECT_EQ((int)GetMismatchFrames(), 150); + } + + virtual void BasicRateTargetingSVC3TL3SLSimulcast() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.kf_max_dist = 150; + cfg_.kf_min_dist = 150; + int num_frames = 300; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, num_frames); + const int bitrate_array[2] = { 500, 1000 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + simulcast_mode_ = 1; + frame_to_start_decoding_ = cfg_.kf_max_dist; + layer_to_decode_ = 2; // SL2 + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Only SL2 layer is decoded. + for (int tl = 0; tl < number_temporal_layers_; tl++) { + int i = layer_to_decode_ * number_temporal_layers_ + tl; + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.6) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.7) + << " The datarate for the file is greater than target by too much!"; + } + // Only top spatial layer (SL2) is decoded, starting at frame 150 + // (frame_to_start_decoding_), so there (300 - 150) / 2 = 75 + // non-reference frames, so mismatch is 75. + int num_mismatch = (num_frames - frame_to_start_decoding_) / 2; + EXPECT_EQ((int)GetMismatchFrames(), num_mismatch); + } + + virtual void BasicRateTargetingSVC1TL2SLIntraOnlyTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 300, 600 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + intra_only_ = 1; + number_temporal_layers_ = 1; + number_spatial_layers_ = 2; + target_layer_bitrate_[0] = 2 * cfg_.rc_target_bitrate / 4; + target_layer_bitrate_[1] = 2 * cfg_.rc_target_bitrate / 4; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC1TL3SLTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 500, 1000 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 1; + number_spatial_layers_ = 3; + target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC1TL3SLMultiRefTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 500, 1000 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + multi_ref_ = 1; + number_temporal_layers_ = 1; + number_spatial_layers_ = 3; + target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC3TL3SLTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.50) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC3TL3SLHDTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingFixedModeSVC3TL3SLHDTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + use_fixed_mode_svc_ = 1; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC3TL3SLMultiThreadSpeedPerLayerTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.g_threads = 2; + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, 400); + cfg_.g_w = 640; + cfg_.g_h = 480; + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + set_speed_per_layer_ = true; + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC3TL3SLHDMultiThread2Test() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.g_threads = 2; + + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC3TL3SLHDMultiThread4Test() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.g_threads = 4; + + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC3TL3SLHDMultiRefTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + multi_ref_ = 1; + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC3TL3SLKfTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.kf_mode = AOM_KF_AUTO; + cfg_.kf_min_dist = cfg_.kf_max_dist = 100; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.55) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.4) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargeting444SVC3TL3SLTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.g_profile = 1; + + ::libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140); + + const int bitrate_array[2] = { 600, 1200 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC3TL1SLMultiRefDropAllEnhTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + // error_resilient can set to off/0, since for SVC the context update + // is done per-layer. + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + multi_ref_ = 1; + // Drop TL1 and TL2: #frames(300) - #TL0. + drop_frames_ = 300 - 300 / 4; + int n = 0; + for (int i = 0; i < 300; i++) { + if (i % 4 != 0) { + drop_frames_list_[n] = i; + n++; + } + } + number_temporal_layers_ = 3; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Test that no mismatches have been found. + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_); + EXPECT_EQ((int)GetMismatchFrames(), 0); + } + + virtual void BasicRateTargetingSVC3TL1SLDropAllEnhTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + // error_resilient can set to off/0, since for SVC the context update + // is done per-layer. + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + // Drop TL1 and TL2: #frames(300) - #TL0. + drop_frames_ = 300 - 300 / 4; + int n = 0; + for (int i = 0; i < 300; i++) { + if (i % 4 != 0) { + drop_frames_list_[n] = i; + n++; + } + } + number_temporal_layers_ = 3; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Test that no mismatches have been found. + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_); + EXPECT_EQ((int)GetMismatchFrames(), 0); + } + + virtual void BasicRateTargetingSVC3TL1SLDropTL2EnhTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + // error_resilient for sequence can be off/0, since dropped frames (TL2) + // are non-reference frames. + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + // Drop TL2: #frames(300) - (#TL0 + #TL1). + drop_frames_ = 300 - 300 / 2; + int n = 0; + for (int i = 0; i < 300; i++) { + if (i % 2 != 0) { + drop_frames_list_[n] = i; + n++; + } + } + number_temporal_layers_ = 3; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Test that no mismatches have been found. + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_); + EXPECT_EQ((int)GetMismatchFrames(), 0); + } + + virtual void BasicRateTargetingSVC3TL1SLDropAllEnhFrameERTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + // Set error_resilience at frame level, with codec control, + // on/1 for enahancement layers and off/0 for base layer frames. + set_frame_level_er_ = 1; + + // Drop TL1 and TL2: #frames(300) - #TL0. + drop_frames_ = 300 - 300 / 4; + int n = 0; + for (int i = 0; i < 300; i++) { + if (i % 4 != 0) { + drop_frames_list_[n] = i; + n++; + } + } + number_temporal_layers_ = 3; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Test that no mismatches have been found. + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_); + EXPECT_EQ((int)GetMismatchFrames(), 0); + } + + virtual void BasicRateTargetingSVC3TL1SLDropSetEnhFrameERTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + // Set error_resilience at frame level, with codec control, + // on/1 for enahancement layers and off/0 for base layer frames. + set_frame_level_er_ = 1; + + // Drop TL1 and TL2: for part of sequence. Start at first TL2 at + // frame 101, and end at second T2 at frame 199. Frame 200 is TL0, + // so we can continue decoding without mismatch (since LAST is the + // only reference and error_resilient = 1 on TL1/TL2 frames). + int n = 0; + int num_nonref = 300 / 2; + for (int i = 101; i < 200; i++) { + if (i % 4 != 0) { + drop_frames_list_[n] = i; + n++; + if (i % 2 != 0) num_nonref -= 1; + } + } + drop_frames_ = n; + number_temporal_layers_ = 3; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Test that no mismatches have been found. + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_); + EXPECT_EQ((int)GetMismatchFrames(), num_nonref); + } + + virtual void BasicRateTargetingSVC2TL1SLDropSetEnhER0Test() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + + // Set error_resilience off. + cfg_.g_error_resilient = 0; + + // Drop TL1: for part of sequence. Start at first TL1 at + // frame 101, and end at frame 199. Frame 200 is TL0, + // so we can continue decoding without mismatch (since LAST is the + // only reference). + int n = 0; + int num_nonref = 300 / 2; + for (int i = 101; i < 200; i++) { + if (i % 2 != 0) { + drop_frames_list_[n] = i; + n++; + if (i % 2 != 0) num_nonref -= 1; + } + } + drop_frames_ = n; + number_temporal_layers_ = 2; + target_layer_bitrate_[0] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Test that no mismatches have been found. + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_); + EXPECT_EQ((int)GetMismatchFrames(), num_nonref); + } + + virtual void BasicRateTargetingSVC3TL1SLDropSetEnhER0Test() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + + // Set error_resilience off. + cfg_.g_error_resilient = 0; + + // Drop TL1 and TL2: for part of sequence. Start at first TL2 at + // frame 101, and end at second T2 at frame 199. Frame 200 is TL0, + // so we can continue decoding without mismatch (since LAST is the + // only reference). + int n = 0; + int num_nonref = 300 / 2; + for (int i = 101; i < 200; i++) { + if (i % 4 != 0) { + drop_frames_list_[n] = i; + n++; + if (i % 2 != 0) num_nonref -= 1; + } + } + drop_frames_ = n; + number_temporal_layers_ = 3; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Test that no mismatches have been found. + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_); + EXPECT_EQ((int)GetMismatchFrames(), num_nonref); + } + + virtual void BasicRateTargetingSVC3TL3SLDropSetEnhER0Test() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 200, 550 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + // Set error_resilience off. + cfg_.g_error_resilient = 0; + // Drop TL1 and TL2: for part of sequence. Start at first TL2 at + // frame 101, and end at second T2 at frame 199. Frame 200 is TL0, + // so we can continue decoding without mismatch (since LAST is the + // only reference). + // Drop here means drop whole superframe. + int n = 0; + int num_nonref = 300 / 2; + for (int i = 101; i < 200; i++) { + if (i % 4 != 0) { + drop_frames_list_[n] = i; + n++; + if (i % 2 != 0) num_nonref -= 1; + } + } + number_temporal_layers_ = 3; + number_spatial_layers_ = 3; + multi_ref_ = 1; + drop_frames_ = n * number_spatial_layers_; + // SL0 + const int bitrate_sl0 = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[0] = 50 * bitrate_sl0 / 100; + target_layer_bitrate_[1] = 70 * bitrate_sl0 / 100; + target_layer_bitrate_[2] = bitrate_sl0; + // SL1 + const int bitrate_sl1 = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[3] = 50 * bitrate_sl1 / 100; + target_layer_bitrate_[4] = 70 * bitrate_sl1 / 100; + target_layer_bitrate_[5] = bitrate_sl1; + // SL2 + const int bitrate_sl2 = 4 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[6] = 50 * bitrate_sl2 / 100; + target_layer_bitrate_[7] = 70 * bitrate_sl2 / 100; + target_layer_bitrate_[8] = bitrate_sl2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Test that no mismatches have been found. + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(300 * number_spatial_layers_ - GetDecodedFrames(), drop_frames_); + EXPECT_EQ((int)GetMismatchFrames(), num_nonref); + } + + virtual void BasicRateTargetingSVC3TL1SLMultiRefCompoundTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, 400); + cfg_.g_w = 640; + cfg_.g_h = 480; + const int bitrate_array[2] = { 400, 800 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + multi_ref_ = 1; + comp_pred_ = 1; + number_temporal_layers_ = 3; + number_spatial_layers_ = 1; + target_layer_bitrate_[0] = 50 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[1] = 70 * cfg_.rc_target_bitrate / 100; + target_layer_bitrate_[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC1TL3SLDynEnablTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, + 1, 0, 400); + const int bitrate_array[2] = { 500, 1000 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 1; + number_spatial_layers_ = 3; + target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8; + dynamic_enable_disable_mode_ = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // No need to check RC on top layer which is disabled part of the time. + for (int i = 0; i < number_spatial_layers_ - 1; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingSVC1TL3SLDynDisEnablTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 500, 1000 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + number_temporal_layers_ = 1; + number_spatial_layers_ = 3; + target_layer_bitrate_[0] = 1 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[1] = 3 * cfg_.rc_target_bitrate / 8; + target_layer_bitrate_[2] = 4 * cfg_.rc_target_bitrate / 8; + dynamic_enable_disable_mode_ = 2; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // No need to check RC on top layer which is disabled part of the time. + for (int i = 0; i < number_spatial_layers_ - 1; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.38) + << " The datarate for the file is greater than target by too much!"; + } + } + + virtual void BasicRateTargetingRPS1TL1SLDropFramesTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = AOM_CBR; + cfg_.g_lag_in_frames = 0; + + ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 300); + const int bitrate_array[2] = { 100, 300 }; + cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)]; + ResetModel(); + rps_mode_ = 1; + rps_recovery_frame_ = 100; + cfg_.g_error_resilient = 0; + // Drop x frames before the recovery frames (where the reference + // is switched to an older reference (golden or altref). + // GOLDEN is 8 frames behind (for the rps pattern example) so we can't + // drop more than 8 frames recovery frame, so choose x = 7. + int n = 0; + for (int i = rps_recovery_frame_ - 7; i < rps_recovery_frame_; i++) { + drop_frames_list_[n] = i; + n++; + } + drop_frames_ = n; + number_spatial_layers_ = 1; + number_temporal_layers_ = 1; + target_layer_bitrate_[0] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) { + ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.60) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.60) + << " The datarate for the file is greater than target by too much!"; + } + // Test that no mismatches have been found. + std::cout << " Decoded frames: " << GetDecodedFrames() << "\n"; + std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; + EXPECT_EQ(300 - GetDecodedFrames(), drop_frames_); + EXPECT_EQ((int)GetMismatchFrames(), 0); + } + + int layer_frame_cnt_; + int superframe_cnt_; + int number_temporal_layers_; + int number_spatial_layers_; + // Allow for up to 3 temporal layers. + int target_layer_bitrate_[AOM_MAX_LAYERS]; + aom_svc_params_t svc_params_; + aom_svc_ref_frame_config_t ref_frame_config_; + aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred_; + aom_svc_layer_id_t layer_id_; + double effective_datarate_tl[AOM_MAX_LAYERS]; + unsigned int drop_frames_; + unsigned int drop_frames_list_[1000]; + unsigned int mismatch_nframes_; + unsigned int decoded_nframes_; + double mismatch_psnr_; + int set_frame_level_er_; + int multi_ref_; + int use_fixed_mode_svc_; + int comp_pred_; + int dynamic_enable_disable_mode_; + int intra_only_; + unsigned int frame_to_start_decoding_; + unsigned int layer_to_decode_; + unsigned int frame_sync_; + unsigned int current_video_frame_; + int screen_mode_; + int rps_mode_; + int rps_recovery_frame_; + int simulcast_mode_; + + int user_define_frame_qp_; + int frame_qp_; + int total_frame_; + bool set_speed_per_layer_; + libaom_test::ACMRandom rnd_; +}; + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SL) { + BasicRateTargetingSVC3TL1SLTest(); +} + +TEST_P(DatarateTestSVC, SetFrameQpSVC3TL1SL) { SetFrameQpSVC3TL1SLTest(); } + +TEST_P(DatarateTestSVC, SetFrameQpSVC3TL3SL) { SetFrameQpSVC3TL3SLTest(); } + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial +// for screen mode. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLScreen) { + BasicRateTargetingSVC3TL1SLScreenTest(); +} + +// Check basic rate targeting for CBR, for 2 temporal layers, 1 spatial +// for screen mode, with frame dropper on at low bitrates +TEST_P(DatarateTestSVC, BasicRateTargetingSVC2TL1SLScreenDropFrame) { + BasicRateTargetingSVC2TL1SLScreenDropFrameTest(); +} +// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal +// for screen mode. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLScreen) { + BasicRateTargetingSVC1TL3SLScreenTest(); +} + +// Check basic rate targeting for CBR, for 1 temporal layer, 1 spatial +// for screen mode, with source with many scene cuts and motion. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL1SLScreenScCutsMotion) { + BasicRateTargetingSVC1TL1SLScreenScCutsMotionTest(); +} + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial, +// with dynamic resize on. Encode at very low bitrate and check that +// there is at least one resize (down) event. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLResize) { + BasicRateTargetingSVC3TL1SLResizeTest(); +} + +// Check basic rate targeting for CBR, for 2 spatial layers, 1 temporal. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL2SL) { + BasicRateTargetingSVC1TL2SLTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial layers, 3 temporal, +// with Intra-only frame inserted in the stream. Verify that we can start +// decoding the SL0 stream at the intra_only frame in mid-sequence. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLIntraStartDecodeBaseMidSeq) { + BasicRateTargetingSVC3TL3SLIntraStartDecodeBaseMidSeq(); +} + +// Check basic rate targeting for CBR, for 3spatial layers, 3 temporal, +// with Intra-only frame inserted in the stream. Verify that we can +// decode all frames and layers with no mismatch. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLIntraMidSeqDecodeAll) { + BasicRateTargetingSVC3TL3SLIntraMidSeqDecodeAll(); +} + +// Check simulcast mode for 3 spatial layers, 3 temporal, +// Key frame is inserted on base SLO in mid-stream, and verify that the +// top spatial layer (SL2) case be decoded, starting with an Intra-only frame. +// Verify that we can decode all frames for SL2 with no mismatch. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLSimulcast) { + BasicRateTargetingSVC3TL3SLSimulcast(); +} + +// Check basic rate targeting for CBR, for 2 spatial layers, 1 temporal, +// with Intra-only frame inserted in the stream. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL2SLIntraOnly) { + BasicRateTargetingSVC1TL2SLIntraOnlyTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SL) { + BasicRateTargetingSVC1TL3SLTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal, +// with additional temporal reference for top spatial layer. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLMultiRef) { + BasicRateTargetingSVC1TL3SLMultiRefTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SL) { + BasicRateTargetingSVC3TL3SLTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHD) { + BasicRateTargetingSVC3TL3SLHDTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers, +// for fixed mode SVC. +TEST_P(DatarateTestSVC, BasicRateTargetingFixedModeSVC3TL3SLHD) { + BasicRateTargetingFixedModeSVC3TL3SLHDTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers, +// for 2 threads, 2 tile_columns, row-mt enabled, and different speed +// per layer. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLMultiThreadSpeedPerLayer) { + BasicRateTargetingSVC3TL3SLMultiThreadSpeedPerLayerTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers, +// for 2 threads, 2 tile_columns, row-mt enabled. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHDMultiThread2) { + BasicRateTargetingSVC3TL3SLHDMultiThread2Test(); +} +// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers, +// for 4 threads, 4 tile_columns, row-mt enabled. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHDMultiThread4) { + BasicRateTargetingSVC3TL3SLHDMultiThread4Test(); +} + +// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers, +// with additional temporal reference for top spatial layer. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHDMultiRef) { + BasicRateTargetingSVC3TL3SLHDMultiRefTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers, +// for auto key frame mode with short key frame period. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLKf) { + BasicRateTargetingSVC3TL3SLKfTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers, +// for 4:4:4 input. +#if defined(CONFIG_MAX_DECODE_PROFILE) && CONFIG_MAX_DECODE_PROFILE < 1 +TEST_P(DatarateTestSVC, DISABLED_BasicRateTargeting444SVC3TL3SL) { +#else +TEST_P(DatarateTestSVC, BasicRateTargeting444SVC3TL3SL) { +#endif + BasicRateTargeting444SVC3TL3SLTest(); +} + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer, +// with dropping of all enhancement layers (TL 1 and TL2). Check that the base +// layer (TL0) can still be decodeable (with no mismatch) with the +// error_resilient flag set to 0. This test used the pattern with multiple +// references (last, golden, and altref), updated on base layer. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLMultiRefDropAllEnh) { + BasicRateTargetingSVC3TL1SLMultiRefDropAllEnhTest(); +} + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer, +// with dropping of all enhancement layers (TL 1 and TL2). Check that the base +// layer (TL0) can still be decodeable (with no mismatch) with the +// error_resilient flag set to 0. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropAllEnh) { + BasicRateTargetingSVC3TL1SLDropAllEnhTest(); +} + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer, +// with dropping of the TL2 enhancement layer, which are non-reference +// (droppble) frames. For the base layer (TL0) and TL1 to still be decodeable +// (with no mismatch), the error_resilient_flag may be off (set to 0), +// since TL2 are non-reference frames. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropTL2Enh) { + BasicRateTargetingSVC3TL1SLDropTL2EnhTest(); +} + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer, +// with dropping of all enhancement layers (TL 1 and TL2). Test that the +// error_resilient flag can be set at frame level, with on/1 on +// enhancement layers and off/0 on base layer. +// This allows for successful decoding after dropping enhancement layer frames. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropAllEnhFrameER) { + BasicRateTargetingSVC3TL1SLDropAllEnhFrameERTest(); +} + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer, +// with dropping set of enhancement layers (TL 1 and TL2) in middle of sequence. +// Test that the error_resilient flag can be set at frame level, with on/1 on +// enhancement layers and off/0 on base layer. +// This allows for successful decoding after dropping a set enhancement layer +// frames in the sequence. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropSetEnhFrameER) { + BasicRateTargetingSVC3TL1SLDropSetEnhFrameERTest(); +} + +// Check basic rate targeting for CBR, for 2 temporal layers, 1 spatial layer, +// with dropping set of enhancement layers (TL 1) in middle of sequence. +// Test that the error_resilient flag can be 0/off for all frames. +// This allows for successful decoding after dropping a set enhancement layer +// frames in the sequence. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC2TL1SLDropSetEnhER0) { + BasicRateTargetingSVC2TL1SLDropSetEnhER0Test(); +} + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer, +// with dropping set of enhancement layers (TL 1 and TL2) in middle of sequence. +// Test that the error_resilient flag can be 0/off for all frames. +// This allows for successful decoding after dropping a set enhancement layer +// frames in the sequence. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLDropSetEnhER0) { + BasicRateTargetingSVC3TL1SLDropSetEnhER0Test(); +} + +// Check basic rate targeting for CBR, for 3 temporal layers, 3 spatial layers, +// with dropping set of enhancement layers (superframe TL 1 and TL2) in middle +// of sequence. Test that the error_resilient flag can be 0/off for all frames. +// This allows for successful decoding after dropping a set enhancement layer +// frames in the sequence. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLDropSetEnhER0) { + BasicRateTargetingSVC3TL3SLDropSetEnhER0Test(); +} + +// Check basic rate targeting for CBR, for 3 temporal layers, 1 spatial layer, +// with compound prediction on, for pattern with two additional refereces +// (golden and altref), both updated on base TLO frames. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL1SLMultiRefCompound) { + BasicRateTargetingSVC3TL1SLMultiRefCompoundTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal, +// with the top spatial layer starting disabled (0 bitrate) and then +// dynamically enabled after x frames with nonzero bitrate. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLDynEnabl) { + BasicRateTargetingSVC1TL3SLDynEnablTest(); +} + +// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal, +// with the top spatial layer dynamically disabled snd enabled during the +// middle of the sequence. +TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLDynDisEnabl) { + BasicRateTargetingSVC1TL3SLDynDisEnablTest(); +} + +// Check basic rate targeting and encoder/decodermismatch, for RPS +// with 1 layer. A number of consecutive frames are lost midway in +// sequence, and encoder resorts to a longer term reference to recovery +// and continue decoding successfully. +TEST_P(DatarateTestSVC, BasicRateTargetingRPS1TL1SLDropFrames) { + BasicRateTargetingRPS1TL1SLDropFramesTest(); +} + +TEST(SvcParams, BitrateOverflow) { + uint8_t buf[6] = { 0 }; + aom_image_t img; + aom_codec_ctx_t enc; + aom_codec_enc_cfg_t cfg; + + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, 1, 1, 1, buf)); + + aom_codec_iface_t *const iface = aom_codec_av1_cx(); + EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_REALTIME), + AOM_CODEC_OK); + cfg.g_w = 1; + cfg.g_h = 1; + ASSERT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK); + + aom_svc_params_t svc_params = {}; + svc_params.framerate_factor[0] = 1; + svc_params.framerate_factor[1] = 2; + svc_params.number_spatial_layers = 1; + svc_params.number_temporal_layers = 2; + svc_params.layer_target_bitrate[0] = INT_MAX; + svc_params.layer_target_bitrate[1] = INT_MAX; + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SVC_PARAMS, &svc_params), + AOM_CODEC_OK); + EXPECT_EQ( + aom_codec_encode(&enc, &img, /*pts=*/0, /*duration=*/1, /*flags=*/0), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_encode(&enc, /*img=*/nullptr, /*pts=*/0, /*duration=*/0, + /*flags=*/0), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +AV1_INSTANTIATE_TEST_SUITE(DatarateTestSVC, + ::testing::Values(::libaom_test::kRealTime), + ::testing::Range(7, 12), ::testing::Values(0, 3), + ::testing::Values(0, 1)); + +} // namespace +} // namespace datarate_test diff --git a/third_party/aom/test/svc_encoder_rtc.sh b/third_party/aom/test/svc_encoder_rtc.sh new file mode 100644 index 0000000000..735166d6f6 --- /dev/null +++ b/third_party/aom/test/svc_encoder_rtc.sh @@ -0,0 +1,85 @@ +#!/bin/sh +## Copyright (c) 2023, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## + +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +svc_encoder_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi +} + +common_flags="-k 10000" +common_flags="${common_flags} --max-q=63" +common_flags="${common_flags} --error-resilient=0" + +# Runs svc_encoder_rtc using with 1 spatial layer 3 temporal layers. +svc_encoder_s1_t3() { + local encoder="${LIBAOM_BIN_PATH}/svc_encoder_rtc${AOM_TEST_EXE_SUFFIX}" + local output_file="${AOM_TEST_OUTPUT_DIR}/svc_encoder_rtc" + + if [ ! -x "${encoder}" ]; then + elog "${encoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${common_flags}" \ + "--width=${YUV_RAW_INPUT_WIDTH}" \ + "--height=${YUV_RAW_INPUT_HEIGHT}" \ + "-lm 2" \ + "--speed=8" \ + "--target-bitrate=400" \ + "--bitrates=220,300,400" \ + "--spatial-layers=1" \ + "--temporal-layers=3" \ + "--timebase=1/30" \ + "${YUV_RAW_INPUT}" \ + "-o ${output_file}" \ + ${devnull} || return 1 + + [ -e "${output_file}" ] || return 1 +} + +# Runs svc_encoder_rtc using with 1 spatial layer 2 temporal layers with +# speed 10. +svc_encoder_s1_t2() { + local encoder="${LIBAOM_BIN_PATH}/svc_encoder_rtc${AOM_TEST_EXE_SUFFIX}" + local output_file="${AOM_TEST_OUTPUT_DIR}/svc_encoder_rtc" + + if [ ! -x "${encoder}" ]; then + elog "${encoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${common_flags}" \ + "--width=${YUV_RAW_INPUT_WIDTH}" \ + "--height=${YUV_RAW_INPUT_HEIGHT}" \ + "-lm 1" \ + "--speed=10" \ + "--target-bitrate=400" \ + "--bitrates=220,400" \ + "--spatial-layers=1" \ + "--temporal-layers=2" \ + "--timebase=1/30" \ + "${YUV_RAW_INPUT}" \ + "-o ${output_file}" \ + ${devnull} || return 1 + + [ -e "${output_file}" ] || return 1 +} + +if [ "$(av1_encode_available)" = "yes" ]; then + svc_encoder_rtc_tests="svc_encoder_s1_t3 + svc_encoder_s1_t2" + run_tests svc_encoder_verify_environment "${svc_encoder_rtc_tests}" +fi diff --git a/third_party/aom/test/temporal_filter_test.cc b/third_party/aom/test/temporal_filter_test.cc new file mode 100644 index 0000000000..85f68b817e --- /dev/null +++ b/third_party/aom/test/temporal_filter_test.cc @@ -0,0 +1,788 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cmath> +#include <cstdlib> +#include <memory> +#include <new> +#include <string> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" +#include "config/av1_rtcd.h" + +#include "aom_ports/mem.h" +#include "av1/encoder/encoder.h" +#include "av1/encoder/temporal_filter.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "test/function_equivalence_test.h" + +using libaom_test::ACMRandom; +using ::testing::Combine; +using ::testing::Values; +using ::testing::ValuesIn; + +#if !CONFIG_REALTIME_ONLY +namespace { +typedef enum { + I400, // Monochrome + I420, // 4:2:0 + I422, // 4:2:2 + I444, // 4:4:4 +} ColorFormat; +static const char *color_fmt_str[] = { "I400", "I420", "I422", "I444" }; +typedef void (*TemporalFilterFunc)( + const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd, + const BLOCK_SIZE block_size, const int mb_row, const int mb_col, + const int num_planes, const double *noise_level, const MV *subblock_mvs, + const int *subblock_mses, const int q_factor, const int filter_strength, + int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum, uint16_t *count); +typedef libaom_test::FuncParam<TemporalFilterFunc> TemporalFilterFuncParam; + +typedef std::tuple<TemporalFilterFuncParam, int> TemporalFilterWithParam; + +class TemporalFilterTest + : public ::testing::TestWithParam<TemporalFilterWithParam> { + public: + ~TemporalFilterTest() override = default; + void SetUp() override { + params_ = GET_PARAM(0); + tf_wgt_calc_lvl_ = GET_PARAM(1); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src1_ = reinterpret_cast<uint8_t *>( + aom_memalign(8, sizeof(uint8_t) * MAX_MB_PLANE * BH * BW)); + src2_ = reinterpret_cast<uint8_t *>( + aom_memalign(8, sizeof(uint8_t) * MAX_MB_PLANE * BH * BW)); + + ASSERT_NE(src1_, nullptr); + ASSERT_NE(src2_, nullptr); + } + + void TearDown() override { + aom_free(src1_); + aom_free(src2_); + } + void RunTest(int isRandom, int run_times, ColorFormat color_fmt); + + void GenRandomData(int width, int height, int stride, int stride2, + int num_planes, int subsampling_x, int subsampling_y) { + uint8_t *src1p = src1_; + uint8_t *src2p = src2_; + for (int plane = 0; plane < num_planes; plane++) { + int plane_w = plane ? width >> subsampling_x : width; + int plane_h = plane ? height >> subsampling_y : height; + int plane_stride = plane ? stride >> subsampling_x : stride; + int plane_stride2 = plane ? stride2 >> subsampling_x : stride2; + for (int ii = 0; ii < plane_h; ii++) { + for (int jj = 0; jj < plane_w; jj++) { + src1p[jj] = rnd_.Rand8(); + src2p[jj] = rnd_.Rand8(); + } + src1p += plane_stride; + src2p += plane_stride2; + } + } + } + + void GenExtremeData(int width, int height, int stride, int stride2, + int num_planes, int subsampling_x, int subsampling_y, + uint8_t val) { + uint8_t *src1p = src1_; + uint8_t *src2p = src2_; + for (int plane = 0; plane < num_planes; plane++) { + int plane_w = plane ? width >> subsampling_x : width; + int plane_h = plane ? height >> subsampling_y : height; + int plane_stride = plane ? stride >> subsampling_x : stride; + int plane_stride2 = plane ? stride2 >> subsampling_x : stride2; + for (int ii = 0; ii < plane_h; ii++) { + for (int jj = 0; jj < plane_w; jj++) { + src1p[jj] = val; + src2p[jj] = (255 - val); + } + src1p += plane_stride; + src2p += plane_stride2; + } + } + } + + protected: + TemporalFilterFuncParam params_; + int32_t tf_wgt_calc_lvl_; + uint8_t *src1_; + uint8_t *src2_; + ACMRandom rnd_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(TemporalFilterTest); + +void TemporalFilterTest::RunTest(int isRandom, int run_times, + ColorFormat color_fmt) { + aom_usec_timer ref_timer, test_timer; + const BLOCK_SIZE block_size = TF_BLOCK_SIZE; + static_assert(block_size == BLOCK_32X32, ""); + const int width = 32; + const int height = 32; + int num_planes = MAX_MB_PLANE; + int subsampling_x = 0; + int subsampling_y = 0; + if (color_fmt == I420) { + subsampling_x = 1; + subsampling_y = 1; + } else if (color_fmt == I422) { + subsampling_x = 1; + subsampling_y = 0; + } else if (color_fmt == I400) { + num_planes = 1; + } + for (int k = 0; k < 3; k++) { + const int stride = width; + const int stride2 = width; + if (isRandom) { + GenRandomData(width, height, stride, stride2, num_planes, subsampling_x, + subsampling_y); + } else { + const int msb = 8; // Up to 8 bit input + const int limit = (1 << msb) - 1; + if (k == 0) { + GenExtremeData(width, height, stride, stride2, num_planes, + subsampling_x, subsampling_y, limit); + } else { + GenExtremeData(width, height, stride, stride2, num_planes, + subsampling_x, subsampling_y, 0); + } + } + double sigma[MAX_MB_PLANE] = { 2.1002103677063437, 2.1002103677063437, + 2.1002103677063437 }; + DECLARE_ALIGNED(16, unsigned int, accumulator_ref[1024 * 3]); + DECLARE_ALIGNED(16, uint16_t, count_ref[1024 * 3]); + memset(accumulator_ref, 0, 1024 * 3 * sizeof(accumulator_ref[0])); + memset(count_ref, 0, 1024 * 3 * sizeof(count_ref[0])); + DECLARE_ALIGNED(16, unsigned int, accumulator_mod[1024 * 3]); + DECLARE_ALIGNED(16, uint16_t, count_mod[1024 * 3]); + memset(accumulator_mod, 0, 1024 * 3 * sizeof(accumulator_mod[0])); + memset(count_mod, 0, 1024 * 3 * sizeof(count_mod[0])); + + static_assert(width == 32 && height == 32, ""); + const MV subblock_mvs[4] = { { 0, 0 }, { 5, 5 }, { 7, 8 }, { 2, 10 } }; + const int subblock_mses[4] = { 15, 16, 17, 18 }; + const int q_factor = 12; + const int filter_strength = 5; + const int mb_row = 0; + const int mb_col = 0; + std::unique_ptr<YV12_BUFFER_CONFIG> frame_to_filter(new (std::nothrow) + YV12_BUFFER_CONFIG); + ASSERT_NE(frame_to_filter, nullptr); + frame_to_filter->y_crop_height = 360; + frame_to_filter->y_crop_width = 540; + frame_to_filter->heights[PLANE_TYPE_Y] = height; + frame_to_filter->heights[PLANE_TYPE_UV] = height >> subsampling_y; + frame_to_filter->strides[PLANE_TYPE_Y] = stride; + frame_to_filter->strides[PLANE_TYPE_UV] = stride >> subsampling_x; + DECLARE_ALIGNED(16, uint8_t, src[1024 * 3]); + frame_to_filter->buffer_alloc = src; + frame_to_filter->flags = 0; // Only support low bit-depth test. + memcpy(src, src1_, 1024 * 3 * sizeof(uint8_t)); + + std::unique_ptr<MACROBLOCKD> mbd(new (std::nothrow) MACROBLOCKD); + ASSERT_NE(mbd, nullptr); + mbd->bd = 8; + for (int plane = AOM_PLANE_Y; plane < num_planes; plane++) { + int plane_height = plane ? height >> subsampling_y : height; + int plane_stride = plane ? stride >> subsampling_x : stride; + frame_to_filter->buffers[plane] = + frame_to_filter->buffer_alloc + plane * plane_stride * plane_height; + mbd->plane[plane].subsampling_x = plane ? subsampling_x : 0; + mbd->plane[plane].subsampling_y = plane ? subsampling_y : 0; + } + + params_.ref_func(frame_to_filter.get(), mbd.get(), block_size, mb_row, + mb_col, num_planes, sigma, subblock_mvs, subblock_mses, + q_factor, filter_strength, tf_wgt_calc_lvl_, src2_, + accumulator_ref, count_ref); + params_.tst_func(frame_to_filter.get(), mbd.get(), block_size, mb_row, + mb_col, num_planes, sigma, subblock_mvs, subblock_mses, + q_factor, filter_strength, tf_wgt_calc_lvl_, src2_, + accumulator_mod, count_mod); + + if (run_times > 1) { + aom_usec_timer_start(&ref_timer); + for (int j = 0; j < run_times; j++) { + params_.ref_func(frame_to_filter.get(), mbd.get(), block_size, mb_row, + mb_col, num_planes, sigma, subblock_mvs, subblock_mses, + q_factor, filter_strength, tf_wgt_calc_lvl_, src2_, + accumulator_ref, count_ref); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int j = 0; j < run_times; j++) { + params_.tst_func(frame_to_filter.get(), mbd.get(), block_size, mb_row, + mb_col, num_planes, sigma, subblock_mvs, subblock_mses, + q_factor, filter_strength, tf_wgt_calc_lvl_, src2_, + accumulator_mod, count_mod); + } + aom_usec_timer_mark(&test_timer); + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + + printf( + "c_time=%d \t simd_time=%d \t " + "gain=%f\t width=%d\t height=%d\t color_format=%s\n", + elapsed_time_c, elapsed_time_simd, + (float)((float)elapsed_time_c / (float)elapsed_time_simd), width, + height, color_fmt_str[color_fmt]); + + } else { + for (int i = 0, l = 0; i < height; i++) { + for (int j = 0; j < width; j++, l++) { + EXPECT_EQ(accumulator_ref[l], accumulator_mod[l]) + << "Error:" << k << " SSE Sum Test [" << width << "x" << height + << "] " << color_fmt_str[color_fmt] + << " C accumulator does not match optimized accumulator."; + EXPECT_EQ(count_ref[l], count_mod[l]) + << "Error:" << k << " SSE Sum Test [" << width << "x" << height + << "] " << color_fmt_str[color_fmt] + << " count does not match optimized count."; + } + } + } + } +} + +TEST_P(TemporalFilterTest, OperationCheck) { + RunTest(1, 1, I400); + RunTest(1, 1, I420); + RunTest(1, 1, I422); + RunTest(1, 1, I444); +} + +TEST_P(TemporalFilterTest, ExtremeValues) { + RunTest(0, 1, I400); + RunTest(0, 1, I420); + RunTest(0, 1, I422); + RunTest(0, 1, I444); +} + +TEST_P(TemporalFilterTest, DISABLED_Speed) { + RunTest(1, 100000, I400); + RunTest(1, 100000, I420); + RunTest(1, 100000, I422); + RunTest(1, 100000, I444); +} + +#if HAVE_AVX2 +TemporalFilterFuncParam temporal_filter_test_avx2[] = { TemporalFilterFuncParam( + &av1_apply_temporal_filter_c, &av1_apply_temporal_filter_avx2) }; +INSTANTIATE_TEST_SUITE_P(AVX2, TemporalFilterTest, + Combine(ValuesIn(temporal_filter_test_avx2), + Values(0, 1))); +#endif // HAVE_AVX2 + +#if HAVE_SSE2 +TemporalFilterFuncParam temporal_filter_test_sse2[] = { TemporalFilterFuncParam( + &av1_apply_temporal_filter_c, &av1_apply_temporal_filter_sse2) }; +INSTANTIATE_TEST_SUITE_P(SSE2, TemporalFilterTest, + Combine(ValuesIn(temporal_filter_test_sse2), + Values(0, 1))); +#endif // HAVE_SSE2 + +#if HAVE_NEON +TemporalFilterFuncParam temporal_filter_test_neon[] = { TemporalFilterFuncParam( + &av1_apply_temporal_filter_c, &av1_apply_temporal_filter_neon) }; +INSTANTIATE_TEST_SUITE_P(NEON, TemporalFilterTest, + Combine(ValuesIn(temporal_filter_test_neon), + Values(0, 1))); +#endif // HAVE_NEON + +#if HAVE_NEON_DOTPROD +TemporalFilterFuncParam temporal_filter_test_neon_dotprod[] = { + TemporalFilterFuncParam(&av1_apply_temporal_filter_c, + &av1_apply_temporal_filter_neon_dotprod) +}; +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, TemporalFilterTest, + Combine(ValuesIn(temporal_filter_test_neon_dotprod), + Values(0, 1))); +#endif // HAVE_NEON_DOTPROD + +#if HAVE_AVX2 || HAVE_NEON +// Width and height for which av1_estimate_noise_from_single_plane() will be +// tested. +const int kWidths[] = { 3840, 1920, 1280, 800, 640, 360, 357 }; +const int kHeights[] = { 2160, 1080, 720, 600, 480, 240, 237 }; +#endif // HAVE_AVX2 || HAVE_NEON + +typedef double (*EstimateNoiseFunc)(const uint8_t *src, int height, int width, + int stride, int edge_thresh); + +typedef std::tuple<EstimateNoiseFunc, EstimateNoiseFunc, int, int> + EstimateNoiseWithParam; + +class EstimateNoiseTest + : public ::testing::TestWithParam<EstimateNoiseWithParam> { + public: + ~EstimateNoiseTest() override = default; + void SetUp() override { + ref_func = GET_PARAM(0); + tst_func = GET_PARAM(1); + width_ = GET_PARAM(2); + height_ = GET_PARAM(3); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src1_ = reinterpret_cast<uint8_t *>( + aom_memalign(8, sizeof(uint8_t) * width_ * height_)); + GenRandomData(width_ * height_); + ASSERT_NE(src1_, nullptr); + } + + void TearDown() override { aom_free(src1_); } + + void RunTest(int run_times) { + stride_ = width_; + + for (int i = 0; i < run_times; i++) { + double ref_out = ref_func(src1_, height_, width_, stride_, + NOISE_ESTIMATION_EDGE_THRESHOLD); + + double tst_out = tst_func(src1_, height_, width_, stride_, + NOISE_ESTIMATION_EDGE_THRESHOLD); + + EXPECT_EQ(ref_out, tst_out); + } + } + + void SpeedTest(int run_times) { + stride_ = width_; + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; i++) { + ref_func(src1_, height_, width_, stride_, + NOISE_ESTIMATION_EDGE_THRESHOLD); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; i++) { + tst_func(src1_, height_, width_, stride_, + NOISE_ESTIMATION_EDGE_THRESHOLD); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("(%3.2f)\n", time1 / time2); + } + + void GenRandomData(int size) { + for (int ii = 0; ii < size; ii++) src1_[ii] = rnd_.Rand8(); + } + + protected: + EstimateNoiseFunc ref_func; + EstimateNoiseFunc tst_func; + ACMRandom rnd_; + uint8_t *src1_; + int width_; + int height_; + int stride_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(EstimateNoiseTest); + +TEST_P(EstimateNoiseTest, RandomValues) { RunTest(1); } + +TEST_P(EstimateNoiseTest, DISABLED_Speed) { SpeedTest(2000); } + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, EstimateNoiseTest, + ::testing::Combine( + ::testing::Values(av1_estimate_noise_from_single_plane_c), + ::testing::Values(av1_estimate_noise_from_single_plane_avx2), + ::testing::ValuesIn(kWidths), ::testing::ValuesIn(kHeights))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, EstimateNoiseTest, + ::testing::Combine( + ::testing::Values(av1_estimate_noise_from_single_plane_c), + ::testing::Values(av1_estimate_noise_from_single_plane_neon), + ::testing::ValuesIn(kWidths), ::testing::ValuesIn(kHeights))); +#endif // HAVE_NEON + +#if CONFIG_AV1_HIGHBITDEPTH + +typedef void (*HBDTemporalFilterFunc)( + const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd, + const BLOCK_SIZE block_size, const int mb_row, const int mb_col, + const int num_planes, const double *noise_level, const MV *subblock_mvs, + const int *subblock_mses, const int q_factor, const int filter_strength, + int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum, uint16_t *count); +typedef libaom_test::FuncParam<HBDTemporalFilterFunc> + HBDTemporalFilterFuncParam; + +typedef std::tuple<HBDTemporalFilterFuncParam, int> HBDTemporalFilterWithParam; + +class HBDTemporalFilterTest + : public ::testing::TestWithParam<HBDTemporalFilterWithParam> { + public: + ~HBDTemporalFilterTest() override = default; + void SetUp() override { + params_ = GET_PARAM(0); + tf_wgt_calc_lvl_ = GET_PARAM(1); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src1_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * MAX_MB_PLANE * BH * BW)); + src2_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * MAX_MB_PLANE * BH * BW)); + + ASSERT_NE(src1_, nullptr); + ASSERT_NE(src2_, nullptr); + } + + void TearDown() override { + aom_free(src1_); + aom_free(src2_); + } + void RunTest(int isRandom, int run_times, int bd, ColorFormat color_fmt); + + void GenRandomData(int width, int height, int stride, int stride2, int bd, + int subsampling_x, int subsampling_y, int num_planes) { + uint16_t *src1p = src1_; + uint16_t *src2p = src2_; + for (int plane = AOM_PLANE_Y; plane < num_planes; plane++) { + int plane_w = plane ? width >> subsampling_x : width; + int plane_h = plane ? height >> subsampling_y : height; + int plane_stride = plane ? stride >> subsampling_x : stride; + int plane_stride2 = plane ? stride2 >> subsampling_x : stride2; + const uint16_t max_val = (1 << bd) - 1; + for (int ii = 0; ii < plane_h; ii++) { + for (int jj = 0; jj < plane_w; jj++) { + src1p[jj] = rnd_.Rand16() & max_val; + src2p[jj] = rnd_.Rand16() & max_val; + } + src1p += plane_stride; + src2p += plane_stride2; + } + } + } + + void GenExtremeData(int width, int height, int stride, int stride2, int bd, + int subsampling_x, int subsampling_y, int num_planes, + uint16_t val) { + uint16_t *src1p = src1_; + uint16_t *src2p = src2_; + for (int plane = AOM_PLANE_Y; plane < num_planes; plane++) { + int plane_w = plane ? width >> subsampling_x : width; + int plane_h = plane ? height >> subsampling_y : height; + int plane_stride = plane ? stride >> subsampling_x : stride; + int plane_stride2 = plane ? stride2 >> subsampling_x : stride2; + uint16_t max_val = (1 << bd) - 1; + for (int ii = 0; ii < plane_h; ii++) { + for (int jj = 0; jj < plane_w; jj++) { + src1p[jj] = val; + src2p[jj] = (max_val - val); + } + src1p += plane_stride; + src2p += plane_stride2; + } + } + } + + protected: + HBDTemporalFilterFuncParam params_; + int tf_wgt_calc_lvl_; + uint16_t *src1_; + uint16_t *src2_; + ACMRandom rnd_; +}; + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HBDTemporalFilterTest); + +void HBDTemporalFilterTest::RunTest(int isRandom, int run_times, int BD, + ColorFormat color_fmt) { + aom_usec_timer ref_timer, test_timer; + const BLOCK_SIZE block_size = TF_BLOCK_SIZE; + static_assert(block_size == BLOCK_32X32, ""); + const int width = 32; + const int height = 32; + int num_planes = MAX_MB_PLANE; + int subsampling_x = 0; + int subsampling_y = 0; + if (color_fmt == I420) { + subsampling_x = 1; + subsampling_y = 1; + } else if (color_fmt == I422) { + subsampling_x = 1; + subsampling_y = 0; + } else if (color_fmt == I400) { + num_planes = 1; + } + for (int k = 0; k < 3; k++) { + const int stride = width; + const int stride2 = width; + if (isRandom) { + GenRandomData(width, height, stride, stride2, BD, subsampling_x, + subsampling_y, num_planes); + } else { + const int msb = BD; + const uint16_t limit = (1 << msb) - 1; + if (k == 0) { + GenExtremeData(width, height, stride, stride2, BD, subsampling_x, + subsampling_y, num_planes, limit); + } else { + GenExtremeData(width, height, stride, stride2, BD, subsampling_x, + subsampling_y, num_planes, 0); + } + } + double sigma[MAX_MB_PLANE] = { 2.1002103677063437, 2.1002103677063437, + 2.1002103677063437 }; + DECLARE_ALIGNED(16, unsigned int, accumulator_ref[1024 * 3]); + DECLARE_ALIGNED(16, uint16_t, count_ref[1024 * 3]); + memset(accumulator_ref, 0, 1024 * 3 * sizeof(accumulator_ref[0])); + memset(count_ref, 0, 1024 * 3 * sizeof(count_ref[0])); + DECLARE_ALIGNED(16, unsigned int, accumulator_mod[1024 * 3]); + DECLARE_ALIGNED(16, uint16_t, count_mod[1024 * 3]); + memset(accumulator_mod, 0, 1024 * 3 * sizeof(accumulator_mod[0])); + memset(count_mod, 0, 1024 * 3 * sizeof(count_mod[0])); + + static_assert(width == 32 && height == 32, ""); + const MV subblock_mvs[4] = { { 0, 0 }, { 5, 5 }, { 7, 8 }, { 2, 10 } }; + const int subblock_mses[4] = { 15, 16, 17, 18 }; + const int q_factor = 12; + const int filter_strength = 5; + const int mb_row = 0; + const int mb_col = 0; + std::unique_ptr<YV12_BUFFER_CONFIG> frame_to_filter(new (std::nothrow) + YV12_BUFFER_CONFIG); + ASSERT_NE(frame_to_filter, nullptr); + frame_to_filter->y_crop_height = 360; + frame_to_filter->y_crop_width = 540; + frame_to_filter->heights[PLANE_TYPE_Y] = height; + frame_to_filter->heights[PLANE_TYPE_UV] = height >> subsampling_y; + frame_to_filter->strides[PLANE_TYPE_Y] = stride; + frame_to_filter->strides[PLANE_TYPE_UV] = stride >> subsampling_x; + DECLARE_ALIGNED(16, uint16_t, src[1024 * 3]); + frame_to_filter->buffer_alloc = CONVERT_TO_BYTEPTR(src); + frame_to_filter->flags = + YV12_FLAG_HIGHBITDEPTH; // Only Hihgbd bit-depth test. + memcpy(src, src1_, 1024 * 3 * sizeof(uint16_t)); + + std::unique_ptr<MACROBLOCKD> mbd(new (std::nothrow) MACROBLOCKD); + ASSERT_NE(mbd, nullptr); + mbd->bd = BD; + for (int plane = AOM_PLANE_Y; plane < num_planes; plane++) { + int plane_height = plane ? height >> subsampling_y : height; + int plane_stride = plane ? stride >> subsampling_x : stride; + frame_to_filter->buffers[plane] = + frame_to_filter->buffer_alloc + plane * plane_stride * plane_height; + mbd->plane[plane].subsampling_x = plane ? subsampling_x : 0; + mbd->plane[plane].subsampling_y = plane ? subsampling_y : 0; + } + + params_.ref_func(frame_to_filter.get(), mbd.get(), block_size, mb_row, + mb_col, num_planes, sigma, subblock_mvs, subblock_mses, + q_factor, filter_strength, tf_wgt_calc_lvl_, + CONVERT_TO_BYTEPTR(src2_), accumulator_ref, count_ref); + params_.tst_func(frame_to_filter.get(), mbd.get(), block_size, mb_row, + mb_col, num_planes, sigma, subblock_mvs, subblock_mses, + q_factor, filter_strength, tf_wgt_calc_lvl_, + CONVERT_TO_BYTEPTR(src2_), accumulator_mod, count_mod); + + if (run_times > 1) { + aom_usec_timer_start(&ref_timer); + for (int j = 0; j < run_times; j++) { + params_.ref_func(frame_to_filter.get(), mbd.get(), block_size, mb_row, + mb_col, num_planes, sigma, subblock_mvs, subblock_mses, + q_factor, filter_strength, tf_wgt_calc_lvl_, + CONVERT_TO_BYTEPTR(src2_), accumulator_ref, count_ref); + } + aom_usec_timer_mark(&ref_timer); + const int elapsed_time_c = + static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int j = 0; j < run_times; j++) { + params_.tst_func(frame_to_filter.get(), mbd.get(), block_size, mb_row, + mb_col, num_planes, sigma, subblock_mvs, subblock_mses, + q_factor, filter_strength, tf_wgt_calc_lvl_, + CONVERT_TO_BYTEPTR(src2_), accumulator_mod, count_mod); + } + aom_usec_timer_mark(&test_timer); + const int elapsed_time_simd = + static_cast<int>(aom_usec_timer_elapsed(&test_timer)); + + printf( + "c_time=%d \t simd_time=%d \t " + "gain=%f\t width=%d\t height=%d\t color_format=%s\n", + elapsed_time_c, elapsed_time_simd, + (float)((float)elapsed_time_c / (float)elapsed_time_simd), width, + height, color_fmt_str[color_fmt]); + + } else { + for (int i = 0, l = 0; i < height; i++) { + for (int j = 0; j < width; j++, l++) { + EXPECT_EQ(accumulator_ref[l], accumulator_mod[l]) + << "Error:" << k << " SSE Sum Test [" << width << "x" << height + << "] " << color_fmt_str[color_fmt] + << " C accumulator does not match optimized accumulator."; + EXPECT_EQ(count_ref[l], count_mod[l]) + << "Error:" << k << " SSE Sum Test [" << width << "x" << height + << "] " << color_fmt_str[color_fmt] + << " C count does not match optimized count."; + } + } + } + } +} + +TEST_P(HBDTemporalFilterTest, OperationCheck) { + RunTest(1, 1, 10, I400); + RunTest(1, 1, 10, I420); + RunTest(1, 1, 10, I422); + RunTest(1, 1, 10, I444); +} + +TEST_P(HBDTemporalFilterTest, ExtremeValues) { + RunTest(0, 1, 10, I400); + RunTest(0, 1, 10, I420); + RunTest(0, 1, 10, I422); + RunTest(0, 1, 10, I444); +} + +TEST_P(HBDTemporalFilterTest, DISABLED_Speed) { + RunTest(1, 100000, 10, I400); + RunTest(1, 100000, 10, I420); + RunTest(1, 100000, 10, I422); + RunTest(1, 100000, 10, I444); +} +#if HAVE_SSE2 +HBDTemporalFilterFuncParam HBDtemporal_filter_test_sse2[] = { + HBDTemporalFilterFuncParam(&av1_highbd_apply_temporal_filter_c, + &av1_highbd_apply_temporal_filter_sse2) +}; +INSTANTIATE_TEST_SUITE_P(SSE2, HBDTemporalFilterTest, + Combine(ValuesIn(HBDtemporal_filter_test_sse2), + Values(0, 1))); +#endif // HAVE_SSE2 +#if HAVE_AVX2 +HBDTemporalFilterFuncParam HBDtemporal_filter_test_avx2[] = { + HBDTemporalFilterFuncParam(&av1_highbd_apply_temporal_filter_c, + &av1_highbd_apply_temporal_filter_avx2) +}; +INSTANTIATE_TEST_SUITE_P(AVX2, HBDTemporalFilterTest, + Combine(ValuesIn(HBDtemporal_filter_test_avx2), + Values(0, 1))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +HBDTemporalFilterFuncParam HBDtemporal_filter_test_neon[] = { + HBDTemporalFilterFuncParam(&av1_highbd_apply_temporal_filter_c, + &av1_highbd_apply_temporal_filter_neon) +}; +INSTANTIATE_TEST_SUITE_P(NEON, HBDTemporalFilterTest, + Combine(ValuesIn(HBDtemporal_filter_test_neon), + Values(0, 1))); +#endif // HAVE_NEON + +using HBDEstimateNoiseFunc = double (*)(const uint16_t *src, int height, + int width, int stride, int bit_depth, + int edge_thresh); + +using HBDEstimateNoiseWithParam = + std::tuple<HBDEstimateNoiseFunc, HBDEstimateNoiseFunc, int, int, int>; + +class HBDEstimateNoiseTest + : public ::testing::TestWithParam<HBDEstimateNoiseWithParam> { + public: + HBDEstimateNoiseTest() + : ref_func_(GET_PARAM(0)), tst_func_(GET_PARAM(1)), + rnd_(libaom_test::ACMRandom::DeterministicSeed()), width_(GET_PARAM(2)), + height_(GET_PARAM(3)), bitdepth_(GET_PARAM(4)) {} + ~HBDEstimateNoiseTest() override = default; + void SetUp() override { + src1_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * width_ * height_)); + ASSERT_NE(src1_, nullptr); + GenRandomData(width_ * height_); + } + + void TearDown() override { aom_free(src1_); } + + void RunTest() { + stride_ = width_; + + double ref_out = ref_func_(src1_, height_, width_, stride_, bitdepth_, + NOISE_ESTIMATION_EDGE_THRESHOLD); + + double tst_out = tst_func_(src1_, height_, width_, stride_, bitdepth_, + NOISE_ESTIMATION_EDGE_THRESHOLD); + + EXPECT_EQ(ref_out, tst_out); + } + + void SpeedTest(int run_times) { + stride_ = width_; + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; i++) { + ref_func_(src1_, height_, width_, stride_, bitdepth_, + NOISE_ESTIMATION_EDGE_THRESHOLD); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; i++) { + tst_func_(src1_, height_, width_, stride_, bitdepth_, + NOISE_ESTIMATION_EDGE_THRESHOLD); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf("%d %dx%d :%7.2f/%7.2f (%3.2f)\n", bitdepth_, width_, height_, time1, + time2, time1 / time2); + } + + void GenRandomData(int size) { + for (int ii = 0; ii < size; ii++) src1_[ii] = rnd_.Rand12(); + } + + private: + HBDEstimateNoiseFunc ref_func_; + HBDEstimateNoiseFunc tst_func_; + ACMRandom rnd_; + uint16_t *src1_; + int width_; + int height_; + int stride_; + int bitdepth_; +}; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HBDEstimateNoiseTest); + +TEST_P(HBDEstimateNoiseTest, RandomValues) { RunTest(); } + +TEST_P(HBDEstimateNoiseTest, DISABLED_Speed) { SpeedTest(2000); } + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, HBDEstimateNoiseTest, + ::testing::Combine( + ::testing::Values(av1_highbd_estimate_noise_from_single_plane_c), + ::testing::Values(av1_highbd_estimate_noise_from_single_plane_neon), + ::testing::ValuesIn(kWidths), ::testing::ValuesIn(kHeights), + ::testing::ValuesIn({ 8, 10, 12 }))); +#endif // HAVE_NEON +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace +#endif diff --git a/third_party/aom/test/test-data.sha1 b/third_party/aom/test/test-data.sha1 new file mode 100644 index 0000000000..4b4a96d444 --- /dev/null +++ b/third_party/aom/test/test-data.sha1 @@ -0,0 +1,575 @@ +a0edab4ab4054127474074d967a33616ccdccc76 *hantro_collage_w176h144.yuv +d5dfb0151c9051f8c85999255645d7a23916d3c0 *hantro_collage_w352h288.yuv +b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv +26b7f64399b84db4b4c9c915d743ec5c2619d4b9 *invalid-bug-1814.ivf +d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-bug-1814.ivf.res +09aa07e5325b3bb5462182eb30b8ecc914630740 *invalid-chromium-906381.ivf +09d2af8dd22201dd8d48e5dcfcaed281ff9422c7 *invalid-chromium-906381.ivf.res +f7c83c14aa35b928ba8b70f3eaa3b92070be4519 *invalid-google-142530197-1.ivf +d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-google-142530197-1.ivf.res +703c05720d5d67053bcee44987635cd78af2f971 *invalid-google-142530197.ivf +d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-google-142530197.ivf.res +fa06784f23751d8c37be94160fb821e855199af4 *invalid-oss-fuzz-10061.ivf +b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-10061.ivf.res +c9e06c4c7fb7d69fd635a1f606a5e478d60e99cf *invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf +88e18e61bd2b7457b4c71ebefbdff0029c41cc04 *invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf.res +91a5bedeb4832c1c2900736cc0f644bb63971bbc *invalid-oss-fuzz-10227.ivf +b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-10227.ivf.res +b2d0a29a65879436bf483d04865faca7d11cc2ee *invalid-oss-fuzz-10389.ivf +f4ce175af1d871ed1603c8936f6b78e968f93c85 *invalid-oss-fuzz-10389.ivf.res.4 +11df8e9a068669c678097d460b63609d3da73828 *invalid-oss-fuzz-10555.ivf +b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-10555.ivf.res +cf5945085fe85456a1f74bf4cc7998b88b3f4b62 *invalid-oss-fuzz-10705.ivf +758671858368ffd2a2c0727898de5661f7cf7d68 *invalid-oss-fuzz-10705.ivf.res +88e29851122cca3f336824f7fa4d9f757f91110c *invalid-oss-fuzz-10723.ivf +64f8a208dec7f1580fbe0371aa15e62bb1262715 *invalid-oss-fuzz-10723.ivf.res.2 +1af486cd2cc83ebeddc76ca7a1c512cc0ec568d5 *invalid-oss-fuzz-10723.ivf.res.3 +0784acc8931090ec24eba752d6c27e359e68fe7d *invalid-oss-fuzz-10779.ivf +5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-oss-fuzz-10779.ivf.res +7d37be9357f89a100ced694aee1ca5a6fad35ba9 *invalid-oss-fuzz-11477.ivf +15932651aacfc4622f0910f728f3f95e08e1753d *invalid-oss-fuzz-11477.ivf.res +1674787c38ddf82a2e5c804203f04f56a304e8e0 *invalid-oss-fuzz-11479.ivf +64f8a208dec7f1580fbe0371aa15e62bb1262715 *invalid-oss-fuzz-11479.ivf.res.2 +b1a45514f0c59be03c9991cd04882426b9b930fa *invalid-oss-fuzz-11523.ivf +3198c7af55a7d50173ce3c369c0cf2d9cdfface6 *invalid-oss-fuzz-11523.ivf.res.2 +cb445173be760c3554f1740ce4d119f57a7be043 *invalid-oss-fuzz-15363.ivf +d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-15363.ivf.res +5b697360bf0f02de31bae9b8da78e93570958fa4 *invalid-oss-fuzz-16437.ivf +d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-16437.ivf.res.2 +e821070cea8eb687be102a1a118e0341c2e9df69 *invalid-oss-fuzz-24706.ivf +d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-24706.ivf.res +c0c32af28c5c6672d14e76d197894723e8a07b07 *invalid-oss-fuzz-33030.ivf +fb38337e7d6203618fcfce4bc2dc17d5a4f00638 *invalid-oss-fuzz-33030.ivf.res +ccbe4081557eb44820a0e6337c4a094421826b9a *invalid-oss-fuzz-9288.ivf +67c54283fe1a26ccf02cc991e4f9a1eea3ac5e78 *invalid-oss-fuzz-9288.ivf.res +c0960f032484579f967881cc025b71cfd7a79ee1 *invalid-oss-fuzz-9463.ivf +5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-oss-fuzz-9463.ivf.res.2 +f448caf378e250b7eea4fa2d1c3cd7ef4a3211ce *invalid-oss-fuzz-9482.ivf +b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-9482.ivf.res +a686989de79af89136f631fd630df639c7861851 *invalid-oss-fuzz-9720.ivf +d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-9720.ivf.res +a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m +0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m +ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m +c934da6fb8cc54ee2a8c17c54cf6076dac37ead0 *park_joy_90p_10_440.yuv +614c32ae1eca391e867c70d19974f0d62664dd99 *park_joy_90p_12_420.y4m +c92825f1ea25c5c37855083a69faac6ac4641a9e *park_joy_90p_12_422.y4m +b592189b885b6cc85db55cc98512a197d73d3b34 *park_joy_90p_12_444.y4m +82c1bfcca368c2f22bad7d693d690d5499ecdd11 *park_joy_90p_12_440.yuv +b9e1e90aece2be6e2c90d89e6ab2372d5f8c792d *park_joy_90p_8_420_a10-1.y4m +4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c *park_joy_90p_8_420.y4m +7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 *park_joy_90p_8_422.y4m +bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 *park_joy_90p_8_444.y4m +81e1f3843748438b8f2e71db484eb22daf72e939 *park_joy_90p_8_440.yuv +b1f1c3ec79114b9a0651af24ce634afb44a9a419 *rush_hour_444.y4m +eb438c6540eb429f74404eedfa3228d409c57874 *desktop_640_360_30.yuv +89e70ebd22c27d275fe14dc2f1a41841a6d8b9ab *kirland_640_480_30.yuv +33c533192759e5bb4f07abfbac389dc259db4686 *macmarcomoving_640_480_30.yuv +8bfaab121080821b8f03b23467911e59ec59b8fe *macmarcostationary_640_480_30.yuv +9ec21aa2c4a8a9d46d5403ea20c93b0ff5ad74a1 *rand_noise_w1280h720.yuv +70894878d916a599842d9ad0dcd24e10c13e5467 *niklas_640_480_30.yuv +8784b6df2d8cc946195a90ac00540500d2e522e4 *tacomanarrows_640_480_30.yuv +edd86a1f5e62fd9da9a9d46078247759c2638009 *tacomasmallcameramovement_640_480_30.yuv +9a70e8b7d14fba9234d0e51dce876635413ce444 *thaloundeskmtg_640_480_30.yuv +e7d315dbf4f3928779e0dc624311196d44491d32 *niklas_1280_720_30.yuv +717da707afcaa1f692ff1946f291054eb75a4f06 *screendata.y4m +9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m +5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m +36ddab9b99eb7545aa0bf362d6f498212d596516 *vase10x10.yuv +c542890ac929749000f7b3883174f2202070d834 *pixel_capture_w320h240.yuv +c2e1ec9936b95254187a359e94aa32a9f3dad1b7 *av1-1-b8-00-quantizer-00.ivf +26cd2a0321d01d9db5f6dace8b43a40cd5b9d58d *av1-1-b8-00-quantizer-00.ivf.md5 +a56dd02c0258d4afea1ee358a22b54e99e39d5e1 *av1-1-b8-00-quantizer-01.ivf +b3d24124d81f1fbb26f5eb0036accb54f3ec69b2 *av1-1-b8-00-quantizer-01.ivf.md5 +3466327cb842a91d69839b11ef930a74f086f4c6 *av1-1-b8-00-quantizer-02.ivf +c111dce946100efeaad34203080eee1d55464df6 *av1-1-b8-00-quantizer-02.ivf.md5 +d3f1f32de5e2c0c19a58bb8ef096108388c6a820 *av1-1-b8-00-quantizer-03.ivf +6265321b31130545b4454982ca93e412a56845b8 *av1-1-b8-00-quantizer-03.ivf.md5 +f37c393ebe73266a5ec8508a2ca33c586ff28e64 *av1-1-b8-00-quantizer-04.ivf +c6e979da71aecc593c0abb40135dd304152b00dd *av1-1-b8-00-quantizer-04.ivf.md5 +ac9c5e93cb19942a9be259d0567ec96c54dcdc7c *av1-1-b8-00-quantizer-05.ivf +49e35a7399568a0e4f015ce323d5a45ea780ca87 *av1-1-b8-00-quantizer-05.ivf.md5 +461142b1b50ae74c6b698d23f5ed3b764eadfb89 *av1-1-b8-00-quantizer-06.ivf +6477ff260624e0f76c94ac872d1e7d5576af4177 *av1-1-b8-00-quantizer-06.ivf.md5 +7f8113cd13d8faaa06fdbaaa50dc328daf037e6d *av1-1-b8-00-quantizer-07.ivf +b26795c6cb408487c20737977cd6b77311772bf7 *av1-1-b8-00-quantizer-07.ivf.md5 +4218f7945a172e1fe4f9e77ec35085a394eda9f4 *av1-1-b8-00-quantizer-08.ivf +ea5d7d501e9a69d805251e4871515d28468d8676 *av1-1-b8-00-quantizer-08.ivf.md5 +837f3bcadfe56cf302db2ebaf9a990446fb35801 *av1-1-b8-00-quantizer-09.ivf +eede995cdac5fd01a411da2e74e86e8394138be1 *av1-1-b8-00-quantizer-09.ivf.md5 +adc229b3780a4968c18ded1bcbe72e3f04643833 *av1-1-b8-00-quantizer-10.ivf +0799b7e54e54ee97bf0e8aad2b75509ce59c7097 *av1-1-b8-00-quantizer-10.ivf.md5 +44bac8247160a8d9a0ab19f890fc89cc9298de1d *av1-1-b8-00-quantizer-11.ivf +cc6b2bf167e114599b242aba574e8c6f1fa2f047 *av1-1-b8-00-quantizer-11.ivf.md5 +ebb3af7dfc15567188bcb617021cdc95ebc560e3 *av1-1-b8-00-quantizer-12.ivf +b716ae29d56cd0c052dbfa1b5dcf850cd0fa8ca7 *av1-1-b8-00-quantizer-12.ivf.md5 +46159641f981a26fb9c374a5ca41e44f0ce0a9f0 *av1-1-b8-00-quantizer-13.ivf +c6db1b8b4a74f83e4a0647e053cea0fc00f6abab *av1-1-b8-00-quantizer-13.ivf.md5 +fadc909d18eb640760fbb075f922fb050e715470 *av1-1-b8-00-quantizer-14.ivf +e36bb6b23273633ba3ef7d28160a7258840a1476 *av1-1-b8-00-quantizer-14.ivf.md5 +8befbd9cc1601dcd36ec6911613855f68e6fd40e *av1-1-b8-00-quantizer-15.ivf +cfc2334b76fb5e7aa9d8607e89d37cbc7716d62e *av1-1-b8-00-quantizer-15.ivf.md5 +ca42e00ae27c6b7f684fe3d2a787d50d2827cb3f *av1-1-b8-00-quantizer-16.ivf +f11278218a7c3c73cfaab2332bab55f06cedcc81 *av1-1-b8-00-quantizer-16.ivf.md5 +05270d365bdc067f9446eda3029a6f41571a5229 *av1-1-b8-00-quantizer-17.ivf +fb6482f35e7ad04bf231ea1806226760abcb3c26 *av1-1-b8-00-quantizer-17.ivf.md5 +617bc72037165efbff478d5a0d342b3c20ffcafd *av1-1-b8-00-quantizer-18.ivf +1ff68d5424f91322123fe0d58f436b8e49cfa99d *av1-1-b8-00-quantizer-18.ivf.md5 +821c3b1ae6054c7a91b2f64428806e57f1157ca6 *av1-1-b8-00-quantizer-19.ivf +f2fd118e786697553d6987f786660a2bb9f00680 *av1-1-b8-00-quantizer-19.ivf.md5 +48bcf17c27d9a4eb73632a68c09f42eff9f9af99 *av1-1-b8-00-quantizer-20.ivf +64d55e4c858414bc2837c9c3e2d5fb6d2208c4b8 *av1-1-b8-00-quantizer-20.ivf.md5 +d61ecdd4f0950bc5c8bae1270b22e711bdd22763 *av1-1-b8-00-quantizer-21.ivf +9d447938596096704fd5f4d41bcdf6fabf9cdfb9 *av1-1-b8-00-quantizer-21.ivf.md5 +59b4b65d8e56ccdd1bddff26a03e991a63409334 *av1-1-b8-00-quantizer-22.ivf +aa1be0c7c7622d612af85f9bf96a212f6fe5ab56 *av1-1-b8-00-quantizer-22.ivf.md5 +95ed96988eb9916cad956db9b929718769de49f1 *av1-1-b8-00-quantizer-23.ivf +596b8a3aea468996d609624367465c412751f52b *av1-1-b8-00-quantizer-23.ivf.md5 +e6c2dc4ce725003152797b3d7b34d7eb34da50c8 *av1-1-b8-00-quantizer-24.ivf +1cd3d7e8b3813a9e5591b94eaeb72d471780e64a *av1-1-b8-00-quantizer-24.ivf.md5 +6734e353008824e523939d1a18daa3f2ab2d8ec6 *av1-1-b8-00-quantizer-25.ivf +c45cf440a05802c1f9e29472175ed397d130d988 *av1-1-b8-00-quantizer-25.ivf.md5 +3372b1c69fb39811156adcea4f6dba802c0918c2 *av1-1-b8-00-quantizer-26.ivf +b1751d55bb3fb788751fe28fb7434bee153bda68 *av1-1-b8-00-quantizer-26.ivf.md5 +e7ddb19a6e2a798d6a4e7dfdfc10b4df777b60e3 *av1-1-b8-00-quantizer-27.ivf +0e19d6b79cd71de69d03e0455349568af979b170 *av1-1-b8-00-quantizer-27.ivf.md5 +7f1c90a35543d6b673e353b3702baf3aa1caeaa7 *av1-1-b8-00-quantizer-28.ivf +d9a4f9cb88103249a05a7e6aa616bf0c16bf9c95 *av1-1-b8-00-quantizer-28.ivf.md5 +28d741b923011c7fcc50a7318256a638d3110a07 *av1-1-b8-00-quantizer-29.ivf +c68cacf2b2ff2694945a99ad836dcf1ee3961c09 *av1-1-b8-00-quantizer-29.ivf.md5 +9a5d9ea4bc76dd40d04e92f33f45e9c2e120e85d *av1-1-b8-00-quantizer-30.ivf +eb02bb8c16c4c0368ddff83e05e516e84ec9eaf3 *av1-1-b8-00-quantizer-30.ivf.md5 +20193c372f44f522e094c2c05fc7e4aaa0717fa8 *av1-1-b8-00-quantizer-31.ivf +a4c1a4ac332f4911f0d5abbd826ebecfb8432d6c *av1-1-b8-00-quantizer-31.ivf.md5 +9617bbd691f093d259dbc8a642a57a153c1fc00c *av1-1-b8-00-quantizer-32.ivf +73d60a348454b126ea6368ea604954bc23f210ae *av1-1-b8-00-quantizer-32.ivf.md5 +d9aea9d72a686c59b60584d827f60ca1ee8eee26 *av1-1-b8-00-quantizer-33.ivf +fbf64de376a63d2d3051da83b0e4e56579b55c0a *av1-1-b8-00-quantizer-33.ivf.md5 +791aaf067f125e5cf4a247cf06a2e29ab071ec90 *av1-1-b8-00-quantizer-34.ivf +8e2e6efe4c069e54844da19125c4280b95990c69 *av1-1-b8-00-quantizer-34.ivf.md5 +01ba67bba5cbf7c94c65da8f4c9bd6e7db24cf3a *av1-1-b8-00-quantizer-35.ivf +0c5e60704a4a6bd27e67b6fd72ca7d2cf7fff50f *av1-1-b8-00-quantizer-35.ivf.md5 +3e255b4a320c9522dcec539fef770b6920b9a102 *av1-1-b8-00-quantizer-36.ivf +1241aab865fd7b4bae73736cbeec1866ea9c90ec *av1-1-b8-00-quantizer-36.ivf.md5 +44fa6fca109747d8f43f6c6aa46d782e5d476d54 *av1-1-b8-00-quantizer-37.ivf +947f0f887c5ac9149cf85e8114a709d6f410fc32 *av1-1-b8-00-quantizer-37.ivf.md5 +8319ac1ddd6ce3279da5780175dff7a3a5fa1054 *av1-1-b8-00-quantizer-38.ivf +5f571b7f88678eab9e54f162cc9898f14e437770 *av1-1-b8-00-quantizer-38.ivf.md5 +5975e7056e17608593a8c40619b68e6576d373d9 *av1-1-b8-00-quantizer-39.ivf +7c870192d6eb70ce5367147a3d2c6a52e11f7bec *av1-1-b8-00-quantizer-39.ivf.md5 +47da942f1e455f1422fc65f06dd57304541d16ac *av1-1-b8-00-quantizer-40.ivf +6ea7116c9ce3a1641c7060bab2f5e06fd0910d61 *av1-1-b8-00-quantizer-40.ivf.md5 +ab35c15dfde21c2572b14e04dbfd5fac1adae449 *av1-1-b8-00-quantizer-41.ivf +19596f9849653b913186b9d6b7072984ede96177 *av1-1-b8-00-quantizer-41.ivf.md5 +23a5fa6c3d0eaffaf13f6402465f5dd33d8ea7f1 *av1-1-b8-00-quantizer-42.ivf +5a2726f0d1b1799d4f70883f1bfe5c9d976c6cf5 *av1-1-b8-00-quantizer-42.ivf.md5 +86cddfc463d2b186ec5a1aa25c4562c05201e3c3 *av1-1-b8-00-quantizer-43.ivf +674c64ec8487ee774ad09350380fa6ac43815807 *av1-1-b8-00-quantizer-43.ivf.md5 +6894c154eb56c4f3fe44d54fc4f9af468b03d175 *av1-1-b8-00-quantizer-44.ivf +eca679a2781eb894d18b3d578e3aaf4f48019a15 *av1-1-b8-00-quantizer-44.ivf.md5 +0960bf018ada4224b8344519cf091850d50a57bd *av1-1-b8-00-quantizer-45.ivf +291bb43b9e1ab167040b51019daf1ccf94fd1e50 *av1-1-b8-00-quantizer-45.ivf.md5 +ea644a4732f1a2534332802c2fa5073344f3c356 *av1-1-b8-00-quantizer-46.ivf +4c7915382b1d6d08709c95525b04ab8830f20ca1 *av1-1-b8-00-quantizer-46.ivf.md5 +d1f8832d33234e2c74a2280090850153ea24ea82 *av1-1-b8-00-quantizer-47.ivf +90eb9959e612602934dcc512fe6f54abf0c88d9c *av1-1-b8-00-quantizer-47.ivf.md5 +69c93f760e8b666eb5b98f510e09d90f9230ac9b *av1-1-b8-00-quantizer-48.ivf +931f869e14bd455de9dac2101b383c29e7d6f04c *av1-1-b8-00-quantizer-48.ivf.md5 +8b660c577d95c031d6711c1134b8d115097f8d7e *av1-1-b8-00-quantizer-49.ivf +0e3fe8b49d497050dc1a0eac5f3ad60f5fe068fe *av1-1-b8-00-quantizer-49.ivf.md5 +d40bb21448a6da0fc9b88cbcf76d2f4226573acb *av1-1-b8-00-quantizer-50.ivf +bcd2a9c9a021ba44fc5dc74ae02194fe49ca76a4 *av1-1-b8-00-quantizer-50.ivf.md5 +3b5a1d464aa89b0f1a6ad4f5a03602292b826172 *av1-1-b8-00-quantizer-51.ivf +49bcde0c56cf8b7fbe429336981be22d39025b74 *av1-1-b8-00-quantizer-51.ivf.md5 +38970a02fb38ddb4954fe4240164cb75de5fc744 *av1-1-b8-00-quantizer-52.ivf +fd02b034d79d4be150efb02bd4349edfd0e41311 *av1-1-b8-00-quantizer-52.ivf.md5 +2fde7a7cf3014d5196d011c47de4a144227ed122 *av1-1-b8-00-quantizer-53.ivf +0cb66e6d8fbb29962a69ae1703e22da50db2c92b *av1-1-b8-00-quantizer-53.ivf.md5 +89a69e9b9a601e40cb491ac3a1d32491f2468ac8 *av1-1-b8-00-quantizer-54.ivf +2f8af51acc73c99b5af81db2bdd1883b611ad311 *av1-1-b8-00-quantizer-54.ivf.md5 +31ee4f56fcb0043e95fff7af49e4ef82aafa5543 *av1-1-b8-00-quantizer-55.ivf +04a7104e02bdd0fa38c118202dbbecdbd11ace02 *av1-1-b8-00-quantizer-55.ivf.md5 +f262f0b234006a2652fceb77b1a8711aa53abb54 *av1-1-b8-00-quantizer-56.ivf +bdd54dc25bc5a147c76163af0bced45c56435d79 *av1-1-b8-00-quantizer-56.ivf.md5 +1ef00617091db4b2b839de623bd6b4fb0b2f5f83 *av1-1-b8-00-quantizer-57.ivf +714c65363a87ed5e6e4ad75c79ddb6af57d41fd9 *av1-1-b8-00-quantizer-57.ivf.md5 +43c9b02feccbb3c709d96015f126b7e3d4c24c64 *av1-1-b8-00-quantizer-58.ivf +bae22b8d6377862bff8219470c0d87205d186a68 *av1-1-b8-00-quantizer-58.ivf.md5 +ca5f780abe4c02e48cceb9c804f3625723c359bf *av1-1-b8-00-quantizer-59.ivf +c60a20bbf60b0b0a442ef3f7b682979053909d6e *av1-1-b8-00-quantizer-59.ivf.md5 +1f6f047e9f0e1da22fb514370d92c3c7c66dcf89 *av1-1-b8-00-quantizer-60.ivf +86dc7fa59d363cf1ae4b027a57b119bda893c1c1 *av1-1-b8-00-quantizer-60.ivf.md5 +bcf0c3353568c47a043f2dc34c9abd3fc04eebd4 *av1-1-b8-00-quantizer-61.ivf +66fc4f729c5915aa19939d1b6e28e5b398e747bb *av1-1-b8-00-quantizer-61.ivf.md5 +ac8d3c54451b52cf557ef435d33e7638088d66df *av1-1-b8-00-quantizer-62.ivf +b57f4e1276ead626a3662339a86111ae6fda49d2 *av1-1-b8-00-quantizer-62.ivf.md5 +2a8aa33513d8e01ae9410c4bf5fe1e471b775482 *av1-1-b8-00-quantizer-63.ivf +9f646ec35a168f495e144c64ba7ce9aeb41cd0a2 *av1-1-b8-00-quantizer-63.ivf.md5 +838388fbda4a1d91be81ff62694c3bf13c460d38 *av1-1-b8-01-size-16x16.ivf +4229c1caf8e25eb3073456fb90ceed206753901e *av1-1-b8-01-size-16x16.ivf.md5 +23f4253bf71e02b2e8ead66da4b3de875e879ef2 *av1-1-b8-01-size-18x16.ivf +af125644436d4b6897dade68336cedad663b6610 *av1-1-b8-01-size-18x16.ivf.md5 +94e4a75bd93052f79998e9e08e6b5dd73dc27e50 *av1-1-b8-01-size-32x16.ivf +e7b3fbc5e4b2469838e7ae36512bd3ce0a81040c *av1-1-b8-01-size-32x16.ivf.md5 +f297bde01c05ec5c07ff8118a0280bd36c52b246 *av1-1-b8-01-size-34x16.ivf +f6bbd94d6063c689de3c7cf94afa2c68b969d12c *av1-1-b8-01-size-34x16.ivf.md5 +1e18bdf68bab7e7282aacc77e423bc7d93d04a8e *av1-1-b8-01-size-64x16.ivf +de75732fccfb385294b23c17f0f1a57b455edcf7 *av1-1-b8-01-size-64x16.ivf.md5 +26b1f6ae80b161e971468085778cc1ece502b330 *av1-1-b8-01-size-66x16.ivf +48bd99813557c314d398e6952da78da07c79d416 *av1-1-b8-01-size-66x16.ivf.md5 +ff213ecf31b982a3a7f009c9739f64e066e1ffe9 *av1-1-b8-01-size-16x18.ivf +86b20a13b1939dc5f678e80491f190d376233d58 *av1-1-b8-01-size-16x18.ivf.md5 +c90bd878c59263a15c6a6f515d1c7e071f141559 *av1-1-b8-01-size-18x18.ivf +6f659036ffcd3dd380cf970cf1a06f7755e0b2de *av1-1-b8-01-size-18x18.ivf.md5 +e16a1411381b34817a4c0d8e5eeaeb8cddcc9c46 *av1-1-b8-01-size-32x18.ivf +fdb1c4ec56f5aa690eadbe897340fee86a06ae2f *av1-1-b8-01-size-32x18.ivf.md5 +fac7052b39bd2d0ae107e0e94050226712c770c2 *av1-1-b8-01-size-34x18.ivf +adb0d5a99228027eaa3b016963df447c9818c447 *av1-1-b8-01-size-34x18.ivf.md5 +b8be5e55d9be42746c2b547d0e26e80b21c9802a *av1-1-b8-01-size-64x18.ivf +8f8f6da34cdf78c5a6551c637e1afe279cc3884e *av1-1-b8-01-size-64x18.ivf.md5 +9e066bdcc2cd789cdf551bd4c9c85c178887b880 *av1-1-b8-01-size-66x18.ivf +e8ec6effa936423ae2eec2b60a3160720d2de912 *av1-1-b8-01-size-66x18.ivf.md5 +6ebe45085cdeebc2acd6da5abd542a59312c0ff4 *av1-1-b8-01-size-16x32.ivf +044695669103dbf158591dce9c649317a177d5f6 *av1-1-b8-01-size-16x32.ivf.md5 +9fabb4f60641b8c7995d1dc451419165d41258ff *av1-1-b8-01-size-18x32.ivf +7263764680dfec864c3fad5df824ab1973489a14 *av1-1-b8-01-size-18x32.ivf.md5 +3f72841a24a13e601d79cf029aa1fdb02970ce0b *av1-1-b8-01-size-32x32.ivf +bbe1ae2888d291ec6bc98cd0784937580c554103 *av1-1-b8-01-size-32x32.ivf.md5 +392131a7c7609acd0dba88fee14f1ed042d23ab1 *av1-1-b8-01-size-34x32.ivf +eea68165ebe9acd28693374bf2266374b9c77786 *av1-1-b8-01-size-34x32.ivf.md5 +78afdd96265811ab9466e906347b57161e5c010d *av1-1-b8-01-size-64x32.ivf +47b317af582700b67f6e77659db1dfaa26c8cde6 *av1-1-b8-01-size-64x32.ivf.md5 +2b4d01f2c9f23044c0d886482c7073bd4d5d37d1 *av1-1-b8-01-size-66x32.ivf +3ad5a58a0ee5086af370b22ab2b5b7592a4f33e7 *av1-1-b8-01-size-66x32.ivf.md5 +78ddae04eb8277ae605bd7017ad7ad27bfc82d39 *av1-1-b8-01-size-16x34.ivf +d0c18e679f1fc51e4f7409831321eed9c4858f6f *av1-1-b8-01-size-16x34.ivf.md5 +38d8ed885f46aead6ec1271d8a5d4aee79b8eb68 *av1-1-b8-01-size-18x34.ivf +097ddbd69b8f54826a35efeb0b8b07ec198bba6b *av1-1-b8-01-size-18x34.ivf.md5 +91a42720bc2e7ba701f4d97b463a098b6707cdbd *av1-1-b8-01-size-32x34.ivf +c590d43d37095bd2e8f8d12c9278477419b72d1a *av1-1-b8-01-size-32x34.ivf.md5 +4cc2a437dba56e8878113d9b390b980522542028 *av1-1-b8-01-size-34x34.ivf +57eeb971f00e64abde25be69dbcb4e3ce5065a57 *av1-1-b8-01-size-34x34.ivf.md5 +b36fee1b6ad69d1206466615d69c05e0a4407939 *av1-1-b8-01-size-64x34.ivf +a78aea0250d0b32657dc0eaf2d8394bc766c0e35 *av1-1-b8-01-size-64x34.ivf.md5 +10e441209262e082e31fef8c15b51579c9e81509 *av1-1-b8-01-size-66x34.ivf +558b46f6ef1662c208012d0b66d1857eeff3244e *av1-1-b8-01-size-66x34.ivf.md5 +dd44aad500c7ca0fc97e3d8f0abed3c83b24c79c *av1-1-b8-01-size-16x64.ivf +a5b64e8063abcf3e4872dc4baf1c32384dc5cf83 *av1-1-b8-01-size-16x64.ivf.md5 +aa849f0d09bcb2ead44719d63043536932d5c9f2 *av1-1-b8-01-size-18x64.ivf +bcdf2dea3590c7031158ffe7b907d9ee35e2fe57 *av1-1-b8-01-size-18x64.ivf.md5 +36e856d30e160ba2fbb00510296202f61afaae49 *av1-1-b8-01-size-32x64.ivf +99299f75b82c40c13f168adf2d124f57044a39a2 *av1-1-b8-01-size-32x64.ivf.md5 +e3e03ec5d38eb25e97e4ec3adc6ed40ecdebd278 *av1-1-b8-01-size-34x64.ivf +84625abf8a200a7d20dd3dd3b277b50b3d62ce32 *av1-1-b8-01-size-34x64.ivf.md5 +7d017daebef2d39ed42a505a8e6103ab0c0988c1 *av1-1-b8-01-size-64x64.ivf +1ff38d5ecba82fb2e6ac3b09c29c9fe74885ac29 *av1-1-b8-01-size-64x64.ivf.md5 +e1b58ba0b462508593399a2ed84db5f1c59ffcd2 *av1-1-b8-01-size-66x64.ivf +a6b2c84c94fe79ab0373d157d1203f8d66de0706 *av1-1-b8-01-size-66x64.ivf.md5 +7b4faa7eb7b73392b62de6613282a98dddc13bb6 *av1-1-b8-01-size-16x66.ivf +a2dacf2bae3c4ab352af66a9600946d29ab9a6ee *av1-1-b8-01-size-16x66.ivf.md5 +0f97805fa30497d4cf39665150f00dfdea52d862 *av1-1-b8-01-size-18x66.ivf +33d8ea0765953250f998da3fe161f2a8cfca2353 *av1-1-b8-01-size-18x66.ivf.md5 +c8bb00256de973e3b3ee31b924f554336d310cdb *av1-1-b8-01-size-32x66.ivf +6a6588e6edc68ff7739968a9e7cc6d9eaaeed356 *av1-1-b8-01-size-32x66.ivf.md5 +75ec54fec5c36eecde6d0a16e0389a5f7ad8ec22 *av1-1-b8-01-size-34x66.ivf +36101dfa9495c18696c0d7d61f25e748f4de7425 *av1-1-b8-01-size-34x66.ivf.md5 +7e5491716e70f8199156b8843513c935667b281e *av1-1-b8-01-size-64x66.ivf +da38755bb0c9ef56b81617835ddf1340242c6dce *av1-1-b8-01-size-64x66.ivf.md5 +68b47b386f61d67cb5b824a7e6bf87c8b9c2bf7b *av1-1-b8-01-size-66x66.ivf +25974893956ebd92df474325946130c34f880ea7 *av1-1-b8-01-size-66x66.ivf.md5 +9f386d19c87dbfd6ac84a06d2393dd88863ac003 *av1-1-b8-01-size-196x196.ivf +788f77f655f55de3db94dd69870316134c149116 *av1-1-b8-01-size-196x196.ivf.md5 +ed3bb2bb52a9d1786e233ef38142b15b85097875 *av1-1-b8-01-size-198x196.ivf +3bb6b6721ad9b2838b2d07e47b29d6c0117526b1 *av1-1-b8-01-size-198x196.ivf.md5 +49461772caaaa7b824d48f4e9c77a906b0dc02d5 *av1-1-b8-01-size-200x196.ivf +f1cba00c36909c56097c8785df476d42bc91f259 *av1-1-b8-01-size-200x196.ivf.md5 +44a656a22958e26ed169a69deb8f373117224f06 *av1-1-b8-01-size-202x196.ivf +69be876b52fe42811bba52d36d0bcc88d6c25b3f *av1-1-b8-01-size-202x196.ivf.md5 +0a6fe9b478363faedbfd465a75790b4c2661b9ba *av1-1-b8-01-size-208x196.ivf +fc8e95a6860a8a37ccdf1dfe49828502fcf96a08 *av1-1-b8-01-size-208x196.ivf.md5 +8e05b5a20ec95afd92bb615a7daa2e17a7ef55a8 *av1-1-b8-01-size-210x196.ivf +0add512bffbda3300d8f684a53b13b996fe2e46d *av1-1-b8-01-size-210x196.ivf.md5 +a15f12652c6b4d0c30f13a439c941bfc4a431d1a *av1-1-b8-01-size-224x196.ivf +b904b93252175f79e0e2b28896131ce93d5fc925 *av1-1-b8-01-size-224x196.ivf.md5 +1a57b913443b267f4a31a6925c39f5b58022f550 *av1-1-b8-01-size-226x196.ivf +7cf3087de5804763a82d2a798243a66459664772 *av1-1-b8-01-size-226x196.ivf.md5 +2cc28541a2a72e8b45a368f71e70fc294e2de3ab *av1-1-b8-01-size-196x198.ivf +bb736eedb4bd1e39bf9d60435b4b27a12842e112 *av1-1-b8-01-size-196x198.ivf.md5 +c4ebf93fbf3ae52108fd7b39ddef3afae48188ea *av1-1-b8-01-size-198x198.ivf +fa4de6881511728bafa15b5f441a0cfdf683cc75 *av1-1-b8-01-size-198x198.ivf.md5 +55fce983186d454b0eb15527393bb2465ba41c6b *av1-1-b8-01-size-200x198.ivf +1ac8fb1ee622cbc4aa1b83cb46b4731c85efae62 *av1-1-b8-01-size-200x198.ivf.md5 +67d276c67886f0a91a7ee06751a64f95eeb7bc1f *av1-1-b8-01-size-202x198.ivf +1633b62d9e4ea41737c42f70cbde9a5671da0cef *av1-1-b8-01-size-202x198.ivf.md5 +081cb3f29d3956d4d858d9661fd3d62c94b68867 *av1-1-b8-01-size-208x198.ivf +871d1c99167408dd32fa7603a7296c9b99ccda15 *av1-1-b8-01-size-208x198.ivf.md5 +b2d80b42468d5f296ae240cfb1fc0b3dd3d96bbc *av1-1-b8-01-size-210x198.ivf +6a3382656cb17b532a97b1061697f9a878fc58d1 *av1-1-b8-01-size-210x198.ivf.md5 +84d7994fa20fcf6c1d8dbd4c2060c988a6fce831 *av1-1-b8-01-size-224x198.ivf +42ea12e15de81f2e8617b6de7bae76de2da4d648 *av1-1-b8-01-size-224x198.ivf.md5 +c74a9281cf98c597121df6bff0ac5312b887f969 *av1-1-b8-01-size-226x198.ivf +4133aae0001804e2bbc7928fc065517a6dd8b288 *av1-1-b8-01-size-226x198.ivf.md5 +27adbf148c63f807bd617cfd78aeaedb8b0f2304 *av1-1-b8-01-size-196x200.ivf +9253e525e6207ef1ce0839b8f88ea781e9abe41e *av1-1-b8-01-size-196x200.ivf.md5 +21c9ea4d882e48353d3df66fcde0e4746168163f *av1-1-b8-01-size-198x200.ivf +3d5ee59fde9194f0eaff736051cfd1d7b7daeff1 *av1-1-b8-01-size-198x200.ivf.md5 +c27b0b57667910847122a0309c703315e444110f *av1-1-b8-01-size-200x200.ivf +7b2a15a17b421ef07e285ca4e8a224f0512c434d *av1-1-b8-01-size-200x200.ivf.md5 +780de549e4163a52590f7c0f488e027a8a4aa053 *av1-1-b8-01-size-202x200.ivf +cb0ec0969522ca60d79a639e9b9509363468ffd0 *av1-1-b8-01-size-202x200.ivf.md5 +2c59821904863e264ae61401cbd494a79bc04f13 *av1-1-b8-01-size-208x200.ivf +9963955966a52b65cdd13465c9fb2ba3b5356755 *av1-1-b8-01-size-208x200.ivf.md5 +ff63121611ea9c0628c7e5af13de5e7786611ca6 *av1-1-b8-01-size-210x200.ivf +2a5993be234e3af2af6d185b2a6f3aaf1979b83a *av1-1-b8-01-size-210x200.ivf.md5 +b8485ada95440d78b51153227231b1aced1a8273 *av1-1-b8-01-size-224x200.ivf +9c3cd32ea6c006a91eb37d69dbeccf878de5d214 *av1-1-b8-01-size-224x200.ivf.md5 +1aa0ce3e3a74f9b600a146e98b05547a0b454c48 *av1-1-b8-01-size-226x200.ivf +e045be96c3af16a9ddc10a9933e8ddfb3319d716 *av1-1-b8-01-size-226x200.ivf.md5 +e92b76480f4339855d998b97182f36b28deadcfa *av1-1-b8-01-size-196x202.ivf +480c707abcd2a650e2160ec397f8348cecb45770 *av1-1-b8-01-size-196x202.ivf.md5 +137b9c0d10a3bdbdf6f97b3e6331f3e8acaf8f91 *av1-1-b8-01-size-198x202.ivf +7429642146d0da55161ab13024a261094ee2ce87 *av1-1-b8-01-size-198x202.ivf.md5 +9cea71c44ad015ac702d675bacca17876e65cb1a *av1-1-b8-01-size-200x202.ivf +76b1ec6c42da55f47e389a561590d1a7c713e495 *av1-1-b8-01-size-200x202.ivf.md5 +26dffdcd0dac9becf68d12e31fcd91eddf1f7154 *av1-1-b8-01-size-202x202.ivf +ddb75e99123fed4ef05d9b85200cefd8985bc84c *av1-1-b8-01-size-202x202.ivf.md5 +04007e83bb66ba547d09f8926ea5bfc7fd9e4b2a *av1-1-b8-01-size-208x202.ivf +5b72eb58db22087ad416c499119f41e718395b52 *av1-1-b8-01-size-208x202.ivf.md5 +721ff7c0ae0e2ed896b5acac230113f1404e769c *av1-1-b8-01-size-210x202.ivf +187d2ef939fc26e1a1c7de65abe8e058d8aae17a *av1-1-b8-01-size-210x202.ivf.md5 +dba41421cc938bcf0234254f96be0325ab66186e *av1-1-b8-01-size-224x202.ivf +58856038c1eb13a7bf0353a30b1affe844cd31b1 *av1-1-b8-01-size-224x202.ivf.md5 +55eba14878d25dcc351ee5e92fa06e559035b409 *av1-1-b8-01-size-226x202.ivf +e295b3d791d40d7c1fff2c40a260078dccaef24a *av1-1-b8-01-size-226x202.ivf.md5 +6c777223990ddfd92040a8526646ed0f39299b0d *av1-1-b8-01-size-196x208.ivf +5210daff766cddaf3945610ee05ff242aef8175a *av1-1-b8-01-size-196x208.ivf.md5 +252831abfb9f4a9a8556c21cc3bf60adfe88210f *av1-1-b8-01-size-198x208.ivf +35ed9601e608a829980cec81e41b7bd3e5f4c2ce *av1-1-b8-01-size-198x208.ivf.md5 +e800ed893a88704a4576d4984957f3664560daa9 *av1-1-b8-01-size-200x208.ivf +82c038f9072a2fcf8d55fb4a474fdd791ba9a290 *av1-1-b8-01-size-200x208.ivf.md5 +9ce7bb932dd99f86da8ff2ab89fa4d3089a78da8 *av1-1-b8-01-size-202x208.ivf +0611bf0179abe3c820a447a2bd3a04c3790f3a87 *av1-1-b8-01-size-202x208.ivf.md5 +e5900d9150c8bebc49776227afd3b0a21f5a6ac6 *av1-1-b8-01-size-208x208.ivf +86d6b9a3840aa0a77938547c905bd6f45d069681 *av1-1-b8-01-size-208x208.ivf.md5 +2758ba5dad16f4a91334f2ed07a4a037201bb873 *av1-1-b8-01-size-210x208.ivf +78453b1fda2ccc6f35e0d762567807757bcddb16 *av1-1-b8-01-size-210x208.ivf.md5 +fff88fb8e833f6b4ad64cb591b219c7cceb7f2d2 *av1-1-b8-01-size-224x208.ivf +87266fc34aaed82cdb98cbc309b221ad52eccd81 *av1-1-b8-01-size-224x208.ivf.md5 +dec839fe64046461015b56cda191835284f42a52 *av1-1-b8-01-size-226x208.ivf +d7a15264fc3fd55d3aec0ccfaa7c434c6d90969f *av1-1-b8-01-size-226x208.ivf.md5 +584782e93ed1cb7797a90fece44becdd1e23bf0d *av1-1-b8-01-size-196x210.ivf +ed76ec841b18a457853e368576967c4768fc2730 *av1-1-b8-01-size-196x210.ivf.md5 +dab625599b9f01398b593e865d9a4a95a029d60f *av1-1-b8-01-size-198x210.ivf +b90e8d96a1f5b329b088b467a11fed2d055d74ca *av1-1-b8-01-size-198x210.ivf.md5 +6774bee17b9e50d2d8630e2e1afc30ded67e662d *av1-1-b8-01-size-200x210.ivf +343a86bd54eb3dd5e9902eb62a3d776dcff2f4f3 *av1-1-b8-01-size-200x210.ivf.md5 +0456c3b8e242eeee019ca97d155f81124de62c90 *av1-1-b8-01-size-202x210.ivf +5a6a6428c9858a0d3561db42ceaf981c143fe479 *av1-1-b8-01-size-202x210.ivf.md5 +6a3a8f65bf806b1be7726b983427880f772c9986 *av1-1-b8-01-size-208x210.ivf +5563ea6d8c65887553ff3000addc6418913f1650 *av1-1-b8-01-size-208x210.ivf.md5 +5a8b69489f8e9b917ea7718ad2645101cdbe5644 *av1-1-b8-01-size-210x210.ivf +f4b01604036fa23000d44fbf42097ae1181bcd62 *av1-1-b8-01-size-210x210.ivf.md5 +fb6f5b08a048698cfe324557ee8cd840c4a3f6ce *av1-1-b8-01-size-224x210.ivf +3ce5c404e3ca09c8e994b3043bad42cd555b00c0 *av1-1-b8-01-size-224x210.ivf.md5 +2e9fc8510d2131b2f3c9a93bececac985e4426d2 *av1-1-b8-01-size-226x210.ivf +897c537e259331ca86cdd6e4d2bd343f8538402e *av1-1-b8-01-size-226x210.ivf.md5 +8300512106fce3424eb74b5d4bc0f4f19f7c9af8 *av1-1-b8-01-size-196x224.ivf +43662ea025ea79afe4964fd4d12a77f4aa4e565e *av1-1-b8-01-size-196x224.ivf.md5 +640f8fda7ade8f2850e2275a9f5e233e33a0ba8d *av1-1-b8-01-size-198x224.ivf +9ac690bdbbce47d7b169128b568f955e70076f8c *av1-1-b8-01-size-198x224.ivf.md5 +ce2e9379c72fc924e364d5727605394a1438a211 *av1-1-b8-01-size-200x224.ivf +1ec35a53d88072b96b255202f678178bc7e5bb20 *av1-1-b8-01-size-200x224.ivf.md5 +5d3af7921623deccb578115c8ce207c019f97f50 *av1-1-b8-01-size-202x224.ivf +14eafd55b0cda3a3476cae7ad500dbd5ee899dd5 *av1-1-b8-01-size-202x224.ivf.md5 +6b6d78e466cf94a5ef8dfe252caa0948dd2ec175 *av1-1-b8-01-size-208x224.ivf +e178b0c272dfcfe614c6b49cb28dad11781af0b6 *av1-1-b8-01-size-208x224.ivf.md5 +dd2232b9e18971d7e19650a1e3218aef1010247f *av1-1-b8-01-size-210x224.ivf +40a66198c47820f5fa2d2e389ec0c1191ea4ffcc *av1-1-b8-01-size-210x224.ivf.md5 +9ec028b81a5ea311683328d856f436e6d0b0e6a0 *av1-1-b8-01-size-224x224.ivf +143b9530ce722385db2c2d883daa649ed42b8d40 *av1-1-b8-01-size-224x224.ivf.md5 +bf833947e62935c54e1e727ccb36157f7c1e9e5d *av1-1-b8-01-size-226x224.ivf +ca4f3b44463106e4f0bb54e490c3bd457d7d780b *av1-1-b8-01-size-226x224.ivf.md5 +5525f7e312ec073f480ed5a2be5bdc4f0ce51a09 *av1-1-b8-01-size-196x226.ivf +062d4b240741184458d2d2abd243ed7877631de8 *av1-1-b8-01-size-196x226.ivf.md5 +e6b911142394b94c23191eaa63c9eb41a00f80b0 *av1-1-b8-01-size-198x226.ivf +3b580d903dddf47082f5e055bfb01a4f05c09b7d *av1-1-b8-01-size-198x226.ivf.md5 +70feb5efeb28df25f7d1a661c73bf013c5ada9b4 *av1-1-b8-01-size-200x226.ivf +f0b894e7f787e62f1492be62f3dedeb065062160 *av1-1-b8-01-size-200x226.ivf.md5 +7f9a10831e2389b31497fad50080b4d5452d6e91 *av1-1-b8-01-size-202x226.ivf +45b7194eba9367c8059403c23ca4ae49e988dfaf *av1-1-b8-01-size-202x226.ivf.md5 +967837a2cfbf9aa3131f73aec6a52dcdd82926c7 *av1-1-b8-01-size-208x226.ivf +c8baedb48fd5d4c956aa8d73fd957370f718f047 *av1-1-b8-01-size-208x226.ivf.md5 +9c926226b9f6b015501d8ac1e3f95e8570283a05 *av1-1-b8-01-size-210x226.ivf +57d4837667fd4c5a7aeb908626d701b632852c60 *av1-1-b8-01-size-210x226.ivf.md5 +25a4940922761239809d82c45c2be1c5e4f48785 *av1-1-b8-01-size-224x226.ivf +87ae7e7558241bf3575a333f56fbad4dfdade8ff *av1-1-b8-01-size-224x226.ivf.md5 +40dd208eb525cd90d7c0674cf787097fb909afae *av1-1-b8-01-size-226x226.ivf +34bdef682a4eae0e0a05e4486a968af1df8b220a *av1-1-b8-01-size-226x226.ivf.md5 +9bbe8499796aa588ff02e313fb0d4349940d2fea *av1-1-b10-00-quantizer-00.ivf +36b402eedad2bacee8ac09acce44e2fc356dd80b *av1-1-b10-00-quantizer-00.ivf.md5 +1d5e1d2827624f328020bf123df213bb175577e0 *av1-1-b10-00-quantizer-01.ivf +16c529be5502369e43ce9c6fe99a9709968e3daf *av1-1-b10-00-quantizer-01.ivf.md5 +39abc20739242a8f05efd4b35d7603c8ad7ff45d *av1-1-b10-00-quantizer-02.ivf +81faa72c3d43b003966fe09ffaae51b07b1059be *av1-1-b10-00-quantizer-02.ivf.md5 +92ebf349b803333a43824a83d997b8cf76f656f9 *av1-1-b10-00-quantizer-03.ivf +5e7556dc998cb8b506a43cc078e30802d7e600e6 *av1-1-b10-00-quantizer-03.ivf.md5 +1c496177c66e49f2e3556af87ec67afb5060170b *av1-1-b10-00-quantizer-04.ivf +560fea4800a44fe19ed8d3e74f425bdbf1fb8abd *av1-1-b10-00-quantizer-04.ivf.md5 +7de864b8475ce0acd0ecb01827f2c9add815352b *av1-1-b10-00-quantizer-05.ivf +1c1aea3db3f54a91866d89fd3b1a0d285ca10310 *av1-1-b10-00-quantizer-05.ivf.md5 +b6501c165619b036d0f7864fd4739973d2d18970 *av1-1-b10-00-quantizer-06.ivf +d758c8eff275651006c41e7dd447cac13b489ad7 *av1-1-b10-00-quantizer-06.ivf.md5 +e4df6f588f156dffaafd9517b64f753cfc9ccf05 *av1-1-b10-00-quantizer-07.ivf +3c577f67dade4537de642fd457ea2b367424f336 *av1-1-b10-00-quantizer-07.ivf.md5 +07e9c4c18abb36c8699c1c12bebcc727f090b525 *av1-1-b10-00-quantizer-08.ivf +4981568ade3170f311cb114fa2689edc4bc35e67 *av1-1-b10-00-quantizer-08.ivf.md5 +2268ecd2899f1b41ae9898925b1d62cfefa30282 *av1-1-b10-00-quantizer-09.ivf +029b03029b65b7c4c208961f0820467ad42fd3d6 *av1-1-b10-00-quantizer-09.ivf.md5 +3d2adaf6441cfa9585dcbf7d19d65bf6992a29a3 *av1-1-b10-00-quantizer-10.ivf +017b7fb4c3ba0747c2d5688d493da33ef993d110 *av1-1-b10-00-quantizer-10.ivf.md5 +006535760bd7dc1cfc95e648b05215954a2e76c2 *av1-1-b10-00-quantizer-11.ivf +c0ae083deb8e820aa49034af4d100944dd977018 *av1-1-b10-00-quantizer-11.ivf.md5 +840e0cbfe1acc8a7a45c823dc55ab44a0b6b553e *av1-1-b10-00-quantizer-12.ivf +49232ea38bdef650c94808f53834f1137cd4bf39 *av1-1-b10-00-quantizer-12.ivf.md5 +04b0e5a7387e07474f51be4b2c3e05211b40f0d0 *av1-1-b10-00-quantizer-13.ivf +a51b5ec4b890df3a64f9f0d866b8c41296c9e081 *av1-1-b10-00-quantizer-13.ivf.md5 +5dc47a140fbcbf08bf91481ee3585e9e067561ab *av1-1-b10-00-quantizer-14.ivf +2625319eef69d6225e6ab6e5ce7790491406cb5d *av1-1-b10-00-quantizer-14.ivf.md5 +f866be86d8d8aa08ded30e42988b0936c1a16064 *av1-1-b10-00-quantizer-15.ivf +03b7c1eefb54d99e30051c7123c0453f04a6579d *av1-1-b10-00-quantizer-15.ivf.md5 +548df2371dfb485419ed9baf28e3f495c64f364a *av1-1-b10-00-quantizer-16.ivf +8a0d6bf1626b05b65c77331305414fe9be54e8c6 *av1-1-b10-00-quantizer-16.ivf.md5 +0077c82f96a2e095a3cb8de9bfa63715e3c9f438 *av1-1-b10-00-quantizer-17.ivf +5d85f77f3087f4b206930722a945c60039262be4 *av1-1-b10-00-quantizer-17.ivf.md5 +1e0f1245ecb4c903b5dc7072d959fc43a7bba381 *av1-1-b10-00-quantizer-18.ivf +06316ae2b45f2359a70cc3855ffd6ab81048b41a *av1-1-b10-00-quantizer-18.ivf.md5 +f197198f7ec058110185fda5297a1a43993654df *av1-1-b10-00-quantizer-19.ivf +bac522c7f234d506c75b5495d74b3fa57c83a4df *av1-1-b10-00-quantizer-19.ivf.md5 +c2f57324d000b349323f37d5ebebde8c2b861f30 *av1-1-b10-00-quantizer-20.ivf +999c6110786cbc25e67792234a5a02f2cb4553c7 *av1-1-b10-00-quantizer-20.ivf.md5 +2ffad9adfd19286fe2166ba877289d201c9a634f *av1-1-b10-00-quantizer-21.ivf +d55713eaa791cfd7bf69b6c26d5032029d9a0f06 *av1-1-b10-00-quantizer-21.ivf.md5 +382528db53328c1a38976f5d9b579eef35d839f4 *av1-1-b10-00-quantizer-22.ivf +cb5bd459e1a90126da9264cff4281515f95755b2 *av1-1-b10-00-quantizer-22.ivf.md5 +b52cc6160fc66f72ad66c198d275a1c73f925022 *av1-1-b10-00-quantizer-23.ivf +c0f9d6659e1f283e9356fd7b4ac9f7cc5544cdc2 *av1-1-b10-00-quantizer-23.ivf.md5 +e11f15e3b63e7606b1122bb3670ee77c09c04840 *av1-1-b10-00-quantizer-24.ivf +e9f141b924440e044270c81a68458fe498599a8e *av1-1-b10-00-quantizer-24.ivf.md5 +fb91793b69824c99b0218788dcea0a74ebd7e84e *av1-1-b10-00-quantizer-25.ivf +434e33d609b2683c3cfbcc3a2cdfc26339590fb6 *av1-1-b10-00-quantizer-25.ivf.md5 +d82e38f31cdcf8b43479e6ddaa83373de38f70a2 *av1-1-b10-00-quantizer-26.ivf +183943b851ba383a536f13c83b93f61ac8961ad5 *av1-1-b10-00-quantizer-26.ivf.md5 +6bf5e4e8e0aca699e493b9eb3672d2117494d74d *av1-1-b10-00-quantizer-27.ivf +f0fb7e0a99180828b0e38b2cfe0622eecc2d26b8 *av1-1-b10-00-quantizer-27.ivf.md5 +d5adee2567544c3ae4223b3f3528a770377878d2 *av1-1-b10-00-quantizer-28.ivf +14edf588efc67570e529b0ff8aeb8e7a0c69238b *av1-1-b10-00-quantizer-28.ivf.md5 +e6dcdc106847956035e3f00aabf4470f97e1887e *av1-1-b10-00-quantizer-29.ivf +413c5cb778611c7c1a810b53861b9ab1fb391f17 *av1-1-b10-00-quantizer-29.ivf.md5 +b5e98b3f6b1db04d46bf43064c6ac64f797aff00 *av1-1-b10-00-quantizer-30.ivf +d1a603661d76c28658c7cd2892b408e91d77893e *av1-1-b10-00-quantizer-30.ivf.md5 +80168371d1150e82e3f46bcbbcabba458b835b19 *av1-1-b10-00-quantizer-31.ivf +904ecd033d4af5239c4d5b3f86e51ed5c3c2e3fb *av1-1-b10-00-quantizer-31.ivf.md5 +96291f6ace85980892d135a5b74188cd629c325f *av1-1-b10-00-quantizer-32.ivf +a5ceace390d4a75d48281fe29060c21557e4f5ae *av1-1-b10-00-quantizer-32.ivf.md5 +0f80495de34eae07c4905b72573a315a879390ec *av1-1-b10-00-quantizer-33.ivf +72b8f662973a660412946687dff878b276ae518e *av1-1-b10-00-quantizer-33.ivf.md5 +24905e3be7db320994b7fb8311dfd50a7c9e54da *av1-1-b10-00-quantizer-34.ivf +cea514bb1b7b064c4d31914a2cb266611c278577 *av1-1-b10-00-quantizer-34.ivf.md5 +083012960dd7c17d3b00fa0e807759c98faded8f *av1-1-b10-00-quantizer-35.ivf +de5fdb9e1e581484af1cc7d2dd3c3e84c90cebb2 *av1-1-b10-00-quantizer-35.ivf.md5 +f725f179aeee5b413620c0dd81b007b245c2a7ed *av1-1-b10-00-quantizer-36.ivf +246b1931c04c02df1f168090e2650827cd5dbabd *av1-1-b10-00-quantizer-36.ivf.md5 +f6aa824156e9848f237481889a8103eb6130f31d *av1-1-b10-00-quantizer-37.ivf +a8f78dd15fc2994369a08c2ddddcd0760c62ea5b *av1-1-b10-00-quantizer-37.ivf.md5 +a8dd662338c493aea266b99203e70af25982633f *av1-1-b10-00-quantizer-38.ivf +09f36d998e85d0450060f540e50b075ae1432fc6 *av1-1-b10-00-quantizer-38.ivf.md5 +d97428871720ed658da6ed0e3f7c15da83387e4c *av1-1-b10-00-quantizer-39.ivf +8c5230048909ee8f86f87c116f153cd910d0141f *av1-1-b10-00-quantizer-39.ivf.md5 +86e754e55e9b63c6e0a4fef01761414f8a6b61ca *av1-1-b10-00-quantizer-40.ivf +99a71accf6457264e45ca80d3b1f082ee5acdecc *av1-1-b10-00-quantizer-40.ivf.md5 +9d18b7236506ab7e107c062620b64096ec0cf423 *av1-1-b10-00-quantizer-41.ivf +5771159a9a7c7b66c9e13bb13ec3d53b37860208 *av1-1-b10-00-quantizer-41.ivf.md5 +54b72bc879a80e66613f421e67db62bba1c0041b *av1-1-b10-00-quantizer-42.ivf +bf958236883ee7209ef4cb0b7503b430634a291e *av1-1-b10-00-quantizer-42.ivf.md5 +a06d5321a51d90404dd7085ae511d7df5d5e1e05 *av1-1-b10-00-quantizer-43.ivf +ddb25723d976043d863634b9dc3b5fb84a245803 *av1-1-b10-00-quantizer-43.ivf.md5 +2ea0b64c170d7299dae1c14a8a49349aee8e0d08 *av1-1-b10-00-quantizer-44.ivf +d18bde1b4893792173fa2014665e9364395ad5e9 *av1-1-b10-00-quantizer-44.ivf.md5 +73e506a32d3518e23424f231c7b5323d7a34a3d6 *av1-1-b10-00-quantizer-45.ivf +be6224ebc77a3e5fb9c1645b876007e584a09d89 *av1-1-b10-00-quantizer-45.ivf.md5 +841223871374464194edc739c48dc7cefd1ff255 *av1-1-b10-00-quantizer-46.ivf +4766d616f923496a8dc113c9b7f875f0c0735f9a *av1-1-b10-00-quantizer-46.ivf.md5 +8bbbbea130aaea453f7b826956a5520d10a0eccf *av1-1-b10-00-quantizer-47.ivf +3ea21fac0c492b03d8ec25e4ee0971cd57e5f71a *av1-1-b10-00-quantizer-47.ivf.md5 +3ce83e0f1e1835b9a6c10fe502a16fd3650839e0 *av1-1-b10-00-quantizer-48.ivf +b468de2c09fca5a6b2bb7a20bab4afd8d192c31d *av1-1-b10-00-quantizer-48.ivf.md5 +f3a757c678aa00f9a9c4c4658d37733fd935925a *av1-1-b10-00-quantizer-49.ivf +f888dc88db576122695d4eb41c486aacd28a2d1d *av1-1-b10-00-quantizer-49.ivf.md5 +a9d78aaef105cc5a95b7ebb54783f37e75673123 *av1-1-b10-00-quantizer-50.ivf +06d0c5e79cc794030c4be022089b1d12c1383f71 *av1-1-b10-00-quantizer-50.ivf.md5 +165c20ee372f83682d094541097e375227353239 *av1-1-b10-00-quantizer-51.ivf +b3d90214b8c6e6f6d9357bb5784d10081325c356 *av1-1-b10-00-quantizer-51.ivf.md5 +5b3ea7a18654d943065f5c176974c3960b56664e *av1-1-b10-00-quantizer-52.ivf +dc61a6e4e2549074130023b14b137fb4fe442ce3 *av1-1-b10-00-quantizer-52.ivf.md5 +74c3b5851b6a94d33b575a689eb8d34592e95d5f *av1-1-b10-00-quantizer-53.ivf +a80e43a0fb2b852426bd941b8d4b8f56690e9bc9 *av1-1-b10-00-quantizer-53.ivf.md5 +d05b8dea2cddd4f0d9e792f42f71afbd29f7811c *av1-1-b10-00-quantizer-54.ivf +432937893321f4bd25fa400b8988c5788cb06ecf *av1-1-b10-00-quantizer-54.ivf.md5 +4eaee0f1970426be0bbeb7d4fccdc7e804e9bea4 *av1-1-b10-00-quantizer-55.ivf +710ab95ce1dcd2540db4477ff4ee6ab771fe0759 *av1-1-b10-00-quantizer-55.ivf.md5 +fe637930c9faa8744cba37effc4cb5510315d1c0 *av1-1-b10-00-quantizer-56.ivf +2f9431b30523fb6a3e4122f22c6c3ff7b96a7987 *av1-1-b10-00-quantizer-56.ivf.md5 +ed54fc7fcec194eef1f50adbbe12a6a36ab6836b *av1-1-b10-00-quantizer-57.ivf +43bccac7800b399210cf15520a83739c23a5d9c7 *av1-1-b10-00-quantizer-57.ivf.md5 +a7b8d628ba3e4c5f37aa6a3d7b82afda73ac89dc *av1-1-b10-00-quantizer-58.ivf +b26638272b787df54f45a46629b852acbcb73e3d *av1-1-b10-00-quantizer-58.ivf.md5 +c077f22ff547fb5ffd020e8dac91d05942fb52df *av1-1-b10-00-quantizer-59.ivf +4efd99cc0891bf345b8cd2ae8e21709d61be497b *av1-1-b10-00-quantizer-59.ivf.md5 +301ab53039d75e1ffa8cc6a0874d9ea94e4a6a0d *av1-1-b10-00-quantizer-60.ivf +4729bd734a6edd2d8d0432a3f66b3d91d565050e *av1-1-b10-00-quantizer-60.ivf.md5 +c78640d3211034df9fcb273bdfc18625819652f2 *av1-1-b10-00-quantizer-61.ivf +3d823eb2b33ccfea68db506626bcbecf49b0f167 *av1-1-b10-00-quantizer-61.ivf.md5 +bf241a449a28773b93e6e529a06dfc28109577e4 *av1-1-b10-00-quantizer-62.ivf +75457d8476f1927f737d089dcf3d0f7f99f3c4fb *av1-1-b10-00-quantizer-62.ivf.md5 +8b6eb3fff2e0db7eac775b08c745250ca591e2d9 *av1-1-b10-00-quantizer-63.ivf +63ea689d025593e5d91760785b8e446d04d4671e *av1-1-b10-00-quantizer-63.ivf.md5 +a9f7ea6312a533cc6426a6145edd190d45813c37 *av1-1-b8-02-allintra.ivf +8fd8f789cfee1069d20f3e2c241f5cad7292239e *av1-1-b8-02-allintra.ivf.md5 +e69e41fee40b408b6eebcc79f266a95f2ee24f9e *av1-1-b8-03-sizedown.mkv +8c528fb3ccda959a29721566e132f730935ca32b *av1-1-b8-03-sizedown.mkv.md5 +1889da5ee1708007e47bb887470ac477e1d7ba01 *av1-1-b8-03-sizeup.mkv +8de81b170635d456602dc8923a8b39c534d01fa8 *av1-1-b8-03-sizeup.mkv.md5 +d3ed7de0aa8c155fe35e0f5f4203240710d31383 *park_joy_90p_8_420_monochrome.y4m +5b3f0907407b809aa66b62cb080feda8c92454ca *park_joy_90p_8_420_vertical_csp.y4m +caf8b6a5f1a5bcb38afae8a54a08c4f4459aafa3 *vase10x10_tiles.txt +e14825f50ff845b8a6932c64cb254007a0b5e3a1 *av1-1-b8-22-svc-L2T1.ivf +0f75f2ac44e61fc83be70c955410fa378e433237 *av1-1-b8-22-svc-L2T1.ivf.md5 +e94687eb0e90179b3800b6d5e11eb7e9bfb34eec *av1-1-b8-22-svc-L1T2.ivf +2bc12b16385ea14323bc79607fb8dfbd7edaf8ef *av1-1-b8-22-svc-L1T2.ivf.md5 +32ef2f14ee9cb11a24a22934f4c065e926e5d236 *av1-1-b8-22-svc-L2T2.ivf +f476a10ff06d750129f8229755d51e17ff141b2a *av1-1-b8-22-svc-L2T2.ivf.md5 +afca5502a489692b0a3c120370b0f43b8fc572a1 *av1-1-b8-04-cdfupdate.ivf +13b9423155a08d5e3a2fd9ae4a973bb046718cdf *av1-1-b8-04-cdfupdate.ivf.md5 +f064290d7fcd3b3de19020e8aec6c43c88d3a505 *av1-1-b8-05-mv.ivf +bff316e63ded5559116bdc2fa4aa97ad7b1a1761 *av1-1-b8-05-mv.ivf.md5 +b48a717c7c003b8dd23c3c2caed1ac673380fdb3 *av1-1-b8-06-mfmv.ivf +1424e3cb53e00eb56b94f4c725826274212c42b6 *av1-1-b8-06-mfmv.ivf.md5 +f8724ed96272ddbc35776908f2df7cb9955766a9 *paris_352_288_30.y4m +11bb40026103182c23a88133edafca369e5575e2 *av1-1-b8-23-film_grain-50.ivf +c58ccf7ff04711acc559c06f0bfce3c5b14800c3 *av1-1-b8-23-film_grain-50.ivf.md5 +2f883c7e11c21a31f79bd9c809541be90b0c7c4a *av1-1-b10-23-film_grain-50.ivf +83f2094fca597ad38b4fd623b807de1774c53ffb *av1-1-b10-23-film_grain-50.ivf.md5 +644e05c6bc0418a72b86427aa01e8b4ecea85e03 *desktop1.320_180.yuv +ad18ca16f0a249fb3b7c38de0d9b327fed273f96 *hantro_collage_w352h288_nv12.yuv +a17584012187cd886b64f8cb0f35bfd8d762f9dc *av1-1-b8-24-monochrome.ivf +e71cd9a07f928c527c900daddd071ae60337426d *av1-1-b8-24-monochrome.ivf.md5 +03a8d002594ccc51932332002bb6f9837ef46d0f *av1-1-b10-24-monochrome.ivf +e24aa6951afd7b2bb53eb1a73e25a19e7b189f82 *av1-1-b10-24-monochrome.ivf.md5 +df0c9481104aa8c81f9e3b61b6d147a331ad3e35 *firstpass_stats +3eaf216d9fc8b4b9bb8c3956311f49a85974806c *bus_352x288_420_f20_b8.yuv +c7f336958e7af6162c20ddc84d67c7dfa9826910 *av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf +36a4fcf07e645ed522cde5845dd9c6ab2b2d1502 *av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf.md5 +9f935d391fdf4a6f7c320355d45770d2e7d6095c *desktopqvga2.320_240.yuv +4d1ad6d3070268ccb000d7fc3ae0f5a9447bfe82 *test_input_w1h1.yuv +ad9942a073e245585c93f764ea299382a65939a7 *crowd_run_360p_10_150f.y4m +9c2aa2d0f63f706f775bf661dfa81e8bb3089d8b *wikipedia_420_360p_60f.y4m diff --git a/third_party/aom/test/test.cmake b/third_party/aom/test/test.cmake new file mode 100644 index 0000000000..ce94a5a657 --- /dev/null +++ b/third_party/aom/test/test.cmake @@ -0,0 +1,647 @@ +# +# Copyright (c) 2017, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and the +# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was +# not distributed with this source code in the LICENSE file, you can obtain it +# at www.aomedia.org/license/software. If the Alliance for Open Media Patent +# License 1.0 was not distributed with this source code in the PATENTS file, you +# can obtain it at www.aomedia.org/license/patent. +# +if(AOM_TEST_TEST_CMAKE_) + return() +endif() # AOM_TEST_TEST_CMAKE_ +set(AOM_TEST_TEST_CMAKE_ 1) + +include(ProcessorCount) + +include("${AOM_ROOT}/test/test_data_util.cmake") + +set(AOM_UNIT_TEST_DATA_LIST_FILE "${AOM_ROOT}/test/test-data.sha1") +set(AOM_IDE_TEST_FOLDER "test") +set(AOM_IDE_TESTDATA_FOLDER "testdata") + +# Appends |AOM_TEST_SOURCE_VARS| with |src_list_name| at the caller's scope. +# This collects all variables containing libaom test source files. +function(add_to_libaom_test_srcs src_list_name) + list(APPEND AOM_TEST_SOURCE_VARS ${src_list_name}) + set(AOM_TEST_SOURCE_VARS "${AOM_TEST_SOURCE_VARS}" PARENT_SCOPE) +endfunction() + +list(APPEND AOM_UNIT_TEST_WRAPPER_SOURCES "${AOM_GEN_SRC_DIR}/usage_exit.c" + "${AOM_ROOT}/test/test_libaom.cc") +add_to_libaom_test_srcs(AOM_UNIT_TEST_WRAPPER_SOURCES) + +list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/acm_random.h" + "${AOM_ROOT}/test/aom_image_test.cc" + "${AOM_ROOT}/test/aom_integer_test.cc" + "${AOM_ROOT}/test/av1_config_test.cc" + "${AOM_ROOT}/test/av1_key_value_api_test.cc" + "${AOM_ROOT}/test/block_test.cc" + "${AOM_ROOT}/test/codec_factory.h" + "${AOM_ROOT}/test/function_equivalence_test.h" + "${AOM_ROOT}/test/log2_test.cc" + "${AOM_ROOT}/test/md5_helper.h" + "${AOM_ROOT}/test/register_state_check.h" + "${AOM_ROOT}/test/test_vectors.cc" + "${AOM_ROOT}/test/test_vectors.h" + "${AOM_ROOT}/test/transform_test_base.h" + "${AOM_ROOT}/test/util.h" + "${AOM_ROOT}/test/video_source.h") +add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_SOURCES) + +list(APPEND AOM_UNIT_TEST_DECODER_SOURCES "${AOM_ROOT}/test/decode_api_test.cc" + "${AOM_ROOT}/test/decode_scalability_test.cc" + "${AOM_ROOT}/test/external_frame_buffer_test.cc" + "${AOM_ROOT}/test/invalid_file_test.cc" + "${AOM_ROOT}/test/test_vector_test.cc" + "${AOM_ROOT}/test/ivf_video_source.h") +add_to_libaom_test_srcs(AOM_UNIT_TEST_DECODER_SOURCES) + +list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/active_map_test.cc" + "${AOM_ROOT}/test/aq_segment_test.cc" + "${AOM_ROOT}/test/av1_external_partition_test.cc" + "${AOM_ROOT}/test/avif_progressive_test.cc" + "${AOM_ROOT}/test/borders_test.cc" + "${AOM_ROOT}/test/cpu_speed_test.cc" + "${AOM_ROOT}/test/cpu_used_firstpass_test.cc" + "${AOM_ROOT}/test/datarate_test.cc" + "${AOM_ROOT}/test/datarate_test.h" + "${AOM_ROOT}/test/deltaq_mode_test.cc" + "${AOM_ROOT}/test/dropframe_encode_test.cc" + "${AOM_ROOT}/test/svc_datarate_test.cc" + "${AOM_ROOT}/test/encode_api_test.cc" + "${AOM_ROOT}/test/encode_small_width_height_test.cc" + "${AOM_ROOT}/test/encode_test_driver.cc" + "${AOM_ROOT}/test/encode_test_driver.h" + "${AOM_ROOT}/test/end_to_end_psnr_test.cc" + "${AOM_ROOT}/test/forced_max_frame_width_height_test.cc" + "${AOM_ROOT}/test/force_key_frame_test.cc" + "${AOM_ROOT}/test/gf_pyr_height_test.cc" + "${AOM_ROOT}/test/rt_end_to_end_test.cc" + "${AOM_ROOT}/test/allintra_end_to_end_test.cc" + "${AOM_ROOT}/test/loopfilter_control_test.cc" + "${AOM_ROOT}/test/frame_size_tests.cc" + "${AOM_ROOT}/test/horz_superres_test.cc" + "${AOM_ROOT}/test/i420_video_source.h" + "${AOM_ROOT}/test/level_test.cc" + "${AOM_ROOT}/test/metadata_test.cc" + "${AOM_ROOT}/test/monochrome_test.cc" + "${AOM_ROOT}/test/postproc_filters_test.cc" + "${AOM_ROOT}/test/resize_test.cc" + "${AOM_ROOT}/test/scalability_test.cc" + "${AOM_ROOT}/test/sharpness_test.cc" + "${AOM_ROOT}/test/y4m_test.cc" + "${AOM_ROOT}/test/y4m_video_source.h" + "${AOM_ROOT}/test/yuv_video_source.h" + "${AOM_ROOT}/test/time_stamp_test.cc") +add_to_libaom_test_srcs(AOM_UNIT_TEST_ENCODER_SOURCES) + +list(APPEND AOM_ENCODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/encode_perf_test.cc") +list(APPEND AOM_UNIT_TEST_WEBM_SOURCES "${AOM_ROOT}/test/webm_video_source.h") +add_to_libaom_test_srcs(AOM_UNIT_TEST_WEBM_SOURCES) +list(APPEND AOM_TEST_INTRA_PRED_SPEED_SOURCES "${AOM_GEN_SRC_DIR}/usage_exit.c" + "${AOM_ROOT}/test/test_intra_pred_speed.cc") + +if(CONFIG_AV1_DECODER) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/decode_test_driver.cc" + "${AOM_ROOT}/test/decode_test_driver.h") +endif() + +if(CONFIG_INTERNAL_STATS AND CONFIG_AV1_HIGHBITDEPTH) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/hbd_metrics_test.cc") +endif() + +list(APPEND AOM_DECODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/decode_perf_test.cc") + +if(CONFIG_REALTIME_ONLY) + list(REMOVE_ITEM AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/allintra_end_to_end_test.cc" + "${AOM_ROOT}/test/av1_external_partition_test.cc" + "${AOM_ROOT}/test/avif_progressive_test.cc" + "${AOM_ROOT}/test/borders_test.cc" + "${AOM_ROOT}/test/cpu_speed_test.cc" + "${AOM_ROOT}/test/cpu_used_firstpass_test.cc" + "${AOM_ROOT}/test/deltaq_mode_test.cc" + "${AOM_ROOT}/test/dropframe_encode_test.cc" + "${AOM_ROOT}/test/end_to_end_psnr_test.cc" + "${AOM_ROOT}/test/force_key_frame_test.cc" + "${AOM_ROOT}/test/gf_pyr_height_test.cc" + "${AOM_ROOT}/test/horz_superres_test.cc" + "${AOM_ROOT}/test/level_test.cc" + "${AOM_ROOT}/test/metadata_test.cc" + "${AOM_ROOT}/test/monochrome_test.cc" + "${AOM_ROOT}/test/postproc_filters_test.cc" + "${AOM_ROOT}/test/sharpness_test.cc") +endif() + +if(NOT BUILD_SHARED_LIBS) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/aom_mem_test.cc" + "${AOM_ROOT}/test/av1_common_int_test.cc" + "${AOM_ROOT}/test/cdef_test.cc" + "${AOM_ROOT}/test/cfl_test.cc" + "${AOM_ROOT}/test/convolve_test.cc" + "${AOM_ROOT}/test/hiprec_convolve_test.cc" + "${AOM_ROOT}/test/hiprec_convolve_test_util.cc" + "${AOM_ROOT}/test/hiprec_convolve_test_util.h" + "${AOM_ROOT}/test/intrabc_test.cc" + "${AOM_ROOT}/test/intrapred_test.cc" + "${AOM_ROOT}/test/lpf_test.cc" + "${AOM_ROOT}/test/scan_test.cc" + "${AOM_ROOT}/test/selfguided_filter_test.cc" + "${AOM_ROOT}/test/simd_cmp_impl.h" + "${AOM_ROOT}/test/simd_impl.h") + + if(HAVE_SSE2) + list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSE2 + "${AOM_ROOT}/test/simd_cmp_sse2.cc") + add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_SSE2) + endif() + + if(HAVE_SSSE3) + list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSSE3 + "${AOM_ROOT}/test/simd_cmp_ssse3.cc") + add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_SSSE3) + endif() + + if(HAVE_SSE4_1) + list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1 + "${AOM_ROOT}/test/simd_cmp_sse4.cc") + add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1) + endif() + + if(HAVE_AVX2) + list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_AVX2 + "${AOM_ROOT}/test/simd_cmp_avx2.cc") + add_to_libaom_test_srcs(AOM_UNIT_TEST_COMMON_INTRIN_AVX2) + endif() + + list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/arf_freq_test.cc" + "${AOM_ROOT}/test/av1_convolve_test.cc" + "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc" + "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc" + "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc" + "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc" + "${AOM_ROOT}/test/av1_k_means_test.cc" + "${AOM_ROOT}/test/av1_nn_predict_test.cc" + "${AOM_ROOT}/test/av1_round_shift_array_test.cc" + "${AOM_ROOT}/test/av1_softmax_test.cc" + "${AOM_ROOT}/test/av1_txfm_test.cc" + "${AOM_ROOT}/test/av1_txfm_test.h" + "${AOM_ROOT}/test/av1_wedge_utils_test.cc" + "${AOM_ROOT}/test/avg_test.cc" + "${AOM_ROOT}/test/blend_a64_mask_1d_test.cc" + "${AOM_ROOT}/test/blend_a64_mask_test.cc" + "${AOM_ROOT}/test/comp_avg_pred_test.cc" + "${AOM_ROOT}/test/comp_avg_pred_test.h" + "${AOM_ROOT}/test/comp_mask_pred_test.cc" + "${AOM_ROOT}/test/disflow_test.cc" + "${AOM_ROOT}/test/encodemb_test.cc" + "${AOM_ROOT}/test/encodetxb_test.cc" + "${AOM_ROOT}/test/end_to_end_qmpsnr_test.cc" + "${AOM_ROOT}/test/end_to_end_ssim_test.cc" + "${AOM_ROOT}/test/error_block_test.cc" + "${AOM_ROOT}/test/fdct4x4_test.cc" + "${AOM_ROOT}/test/fft_test.cc" + "${AOM_ROOT}/test/firstpass_test.cc" + "${AOM_ROOT}/test/fwht4x4_test.cc" + "${AOM_ROOT}/test/hadamard_test.cc" + "${AOM_ROOT}/test/horver_correlation_test.cc" + "${AOM_ROOT}/test/masked_sad_test.cc" + "${AOM_ROOT}/test/masked_variance_test.cc" + "${AOM_ROOT}/test/minmax_test.cc" + "${AOM_ROOT}/test/motion_vector_test.cc" + "${AOM_ROOT}/test/mv_cost_test.cc" + "${AOM_ROOT}/test/noise_model_test.cc" + "${AOM_ROOT}/test/obmc_sad_test.cc" + "${AOM_ROOT}/test/obmc_variance_test.cc" + "${AOM_ROOT}/test/pickrst_test.cc" + "${AOM_ROOT}/test/reconinter_test.cc" + "${AOM_ROOT}/test/sad_test.cc" + "${AOM_ROOT}/test/subtract_test.cc" + "${AOM_ROOT}/test/sum_squares_test.cc" + "${AOM_ROOT}/test/sse_sum_test.cc" + "${AOM_ROOT}/test/variance_test.cc" + "${AOM_ROOT}/test/warp_filter_test.cc" + "${AOM_ROOT}/test/warp_filter_test_util.cc" + "${AOM_ROOT}/test/warp_filter_test_util.h" + "${AOM_ROOT}/test/webmenc_test.cc" + "${AOM_ROOT}/test/wiener_test.cc") + + if(NOT CONFIG_REALTIME_ONLY) + list(APPEND AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1 + "${AOM_ROOT}/test/corner_match_test.cc") + endif() + + if(CONFIG_ACCOUNTING) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/accounting_test.cc") + endif() + + if(CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/altref_test.cc" + "${AOM_ROOT}/test/av1_encoder_parms_get_to_decoder.cc" + "${AOM_ROOT}/test/av1_ext_tile_test.cc" + "${AOM_ROOT}/test/binary_codes_test.cc" + "${AOM_ROOT}/test/boolcoder_test.cc" + "${AOM_ROOT}/test/cnn_test.cc" + "${AOM_ROOT}/test/decode_multithreaded_test.cc" + "${AOM_ROOT}/test/divu_small_test.cc" + "${AOM_ROOT}/test/dr_prediction_test.cc" + "${AOM_ROOT}/test/ec_test.cc" + "${AOM_ROOT}/test/error_resilience_test.cc" + "${AOM_ROOT}/test/ethread_test.cc" + "${AOM_ROOT}/test/film_grain_table_test.cc" + "${AOM_ROOT}/test/kf_test.cc" + "${AOM_ROOT}/test/lossless_test.cc" + "${AOM_ROOT}/test/quant_test.cc" + "${AOM_ROOT}/test/ratectrl_test.cc" + "${AOM_ROOT}/test/rd_test.cc" + "${AOM_ROOT}/test/sb_multipass_test.cc" + "${AOM_ROOT}/test/sb_qp_sweep_test.cc" + "${AOM_ROOT}/test/screen_content_test.cc" + "${AOM_ROOT}/test/segment_binarization_sync.cc" + "${AOM_ROOT}/test/still_picture_test.cc" + "${AOM_ROOT}/test/temporal_filter_test.cc" + "${AOM_ROOT}/test/tile_config_test.cc" + "${AOM_ROOT}/test/tile_independence_test.cc" + "${AOM_ROOT}/test/tpl_model_test.cc") + if(CONFIG_AV1_HIGHBITDEPTH) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/coding_path_sync.cc") + endif() + if(CONFIG_REALTIME_ONLY) + list(REMOVE_ITEM AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/altref_test.cc" + "${AOM_ROOT}/test/av1_encoder_parms_get_to_decoder.cc" + "${AOM_ROOT}/test/av1_ext_tile_test.cc" + "${AOM_ROOT}/test/cnn_test.cc" + "${AOM_ROOT}/test/decode_multithreaded_test.cc" + "${AOM_ROOT}/test/error_resilience_test.cc" + "${AOM_ROOT}/test/kf_test.cc" + "${AOM_ROOT}/test/lossless_test.cc" + "${AOM_ROOT}/test/sb_multipass_test.cc" + "${AOM_ROOT}/test/sb_qp_sweep_test.cc" + "${AOM_ROOT}/test/selfguided_filter_test.cc" + "${AOM_ROOT}/test/screen_content_test.cc" + "${AOM_ROOT}/test/still_picture_test.cc" + "${AOM_ROOT}/test/tile_independence_test.cc" + "${AOM_ROOT}/test/tpl_model_test.cc") + endif() + endif() + + if(CONFIG_FPMT_TEST AND (NOT CONFIG_REALTIME_ONLY)) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/frame_parallel_enc_test.cc") + endif() + + if(HAVE_SSE2) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/simd_sse2_test.cc") + endif() + + if(HAVE_SSSE3) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/simd_ssse3_test.cc") + endif() + + if(HAVE_SSE4_1) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/simd_sse4_test.cc") + endif() + + if(HAVE_SSE4_1 OR HAVE_NEON) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/filterintra_test.cc") + + list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/av1_highbd_iht_test.cc") + endif() + + if(HAVE_AVX2) + list(APPEND AOM_UNIT_TEST_COMMON_SOURCES + "${AOM_ROOT}/test/simd_avx2_test.cc") + endif() + + if(CONFIG_AV1_TEMPORAL_DENOISING AND (HAVE_SSE2 OR HAVE_NEON)) + list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/av1_temporal_denoiser_test.cc") + endif() + + if(CONFIG_AV1_HIGHBITDEPTH) + list(APPEND AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1 + "${AOM_ROOT}/test/av1_quantize_test.cc") + endif() + + if(HAVE_SSE2 OR HAVE_NEON) + list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/quantize_func_test.cc") + endif() + + if(HAVE_SSE4_1) + list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/av1_convolve_scale_test.cc" + "${AOM_ROOT}/test/av1_horz_only_frame_superres_test.cc" + "${AOM_ROOT}/test/intra_edge_test.cc") + endif() + + if(HAVE_NEON) + list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/av1_convolve_scale_test.cc" + "${AOM_ROOT}/test/av1_horz_only_frame_superres_test.cc" + "${AOM_ROOT}/test/intra_edge_test.cc") + endif() + + if(HAVE_SSE4_2 OR HAVE_ARM_CRC32) + list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES "${AOM_ROOT}/test/hash_test.cc") + endif() + + if(CONFIG_REALTIME_ONLY) + list(REMOVE_ITEM AOM_UNIT_TEST_ENCODER_SOURCES + "${AOM_ROOT}/test/disflow_test.cc" + "${AOM_ROOT}/test/end_to_end_qmpsnr_test.cc" + "${AOM_ROOT}/test/end_to_end_ssim_test.cc" + "${AOM_ROOT}/test/firstpass_test.cc" + "${AOM_ROOT}/test/motion_vector_test.cc" + "${AOM_ROOT}/test/obmc_sad_test.cc" + "${AOM_ROOT}/test/obmc_variance_test.cc" + "${AOM_ROOT}/test/pickrst_test.cc" + "${AOM_ROOT}/test/warp_filter_test.cc" + "${AOM_ROOT}/test/warp_filter_test_util.cc" + "${AOM_ROOT}/test/warp_filter_test_util.h" + "${AOM_ROOT}/test/wiener_test.cc") + endif() +endif() + +if(CONFIG_AV1_ENCODER AND ENABLE_TESTS) + list(APPEND AOM_RC_TEST_SOURCES "${AOM_ROOT}/test/codec_factory.h" + "${AOM_ROOT}/test/decode_test_driver.cc" + "${AOM_ROOT}/test/decode_test_driver.h" + "${AOM_ROOT}/test/encode_test_driver.cc" + "${AOM_ROOT}/test/encode_test_driver.h" + "${AOM_ROOT}/test/i420_video_source.h" + "${AOM_ROOT}/test/ratectrl_rtc_test.cc" + "${AOM_ROOT}/test/test_aom_rc.cc" "${AOM_ROOT}/test/util.h") + if(CONFIG_THREE_PASS) + # Add the dependencies of "${AOM_ROOT}/common/ivfdec.c". + list(APPEND AOM_RC_TEST_SOURCES "${AOM_ROOT}/common/tools_common.c" + "${AOM_ROOT}/common/tools_common.h" + "${AOM_GEN_SRC_DIR}/usage_exit.c") + endif() +endif() + +if(ENABLE_TESTS) + if(BUILD_SHARED_LIBS AND APPLE) # Silence an RPATH warning. + set(CMAKE_MACOSX_RPATH 1) + endif() + + add_library( + aom_gtest STATIC + "${AOM_ROOT}/third_party/googletest/src/googletest/src/gtest-all.cc") + set_property(TARGET aom_gtest PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + target_include_directories( + aom_gtest + PUBLIC "${AOM_ROOT}/third_party/googletest/src/googletest/include" + PRIVATE "${AOM_ROOT}/third_party/googletest/src/googletest") + + # The definition of GTEST_HAS_PTHREAD must be public, since it's checked by + # interface headers, not just by the implementation. + if(NOT (MSVC OR WIN32)) + if(CONFIG_MULTITHREAD AND CMAKE_USE_PTHREADS_INIT) + target_compile_definitions(aom_gtest PUBLIC GTEST_HAS_PTHREAD=1) + else() + target_compile_definitions(aom_gtest PUBLIC GTEST_HAS_PTHREAD=0) + endif() + endif() + + add_library( + aom_gmock STATIC + "${AOM_ROOT}/third_party/googletest/src/googlemock/src/gmock-all.cc") + set_property(TARGET aom_gmock PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + target_include_directories( + aom_gmock + PUBLIC "${AOM_ROOT}/third_party/googletest/src/googlemock/include" + PRIVATE "${AOM_ROOT}/third_party/googletest/src/googlemock") + target_link_libraries(aom_gmock ${AOM_LIB_LINK_TYPE} aom_gtest) +endif() + +# Setup testdata download targets, test build targets, and test run targets. The +# libaom and app util targets must exist before this function is called. +function(setup_aom_test_targets) + + # TODO(tomfinegan): Build speed optimization. $AOM_UNIT_TEST_COMMON_SOURCES + # and $AOM_UNIT_TEST_ENCODER_SOURCES are very large. The build of test targets + # could be sped up (on multicore build machines) by compiling sources in each + # list into separate object library targets, and then linking them into + # test_libaom. + add_library(test_aom_common OBJECT ${AOM_UNIT_TEST_COMMON_SOURCES}) + set_property(TARGET test_aom_common PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + add_dependencies(test_aom_common aom) + target_link_libraries(test_aom_common ${AOM_LIB_LINK_TYPE} aom_gtest) + + if(CONFIG_AV1_DECODER) + add_library(test_aom_decoder OBJECT ${AOM_UNIT_TEST_DECODER_SOURCES}) + set_property(TARGET test_aom_decoder PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + add_dependencies(test_aom_decoder aom) + target_link_libraries(test_aom_decoder ${AOM_LIB_LINK_TYPE} aom_gtest) + endif() + + if(CONFIG_AV1_ENCODER) + add_library(test_aom_encoder OBJECT ${AOM_UNIT_TEST_ENCODER_SOURCES}) + set_property(TARGET test_aom_encoder PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + add_dependencies(test_aom_encoder aom) + target_link_libraries(test_aom_encoder ${AOM_LIB_LINK_TYPE} aom_gtest) + endif() + + add_executable(test_libaom ${AOM_UNIT_TEST_WRAPPER_SOURCES} + $<TARGET_OBJECTS:aom_common_app_util> + $<TARGET_OBJECTS:test_aom_common>) + set_property(TARGET test_libaom PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + list(APPEND AOM_APP_TARGETS test_libaom) + + if(CONFIG_AV1_DECODER) + target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:aom_decoder_app_util> + $<TARGET_OBJECTS:test_aom_decoder>) + + if(ENABLE_DECODE_PERF_TESTS AND CONFIG_WEBM_IO) + target_sources(test_libaom PRIVATE ${AOM_DECODE_PERF_TEST_SOURCES}) + endif() + endif() + + if(CONFIG_AV1_ENCODER) + target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:test_aom_encoder> + $<TARGET_OBJECTS:aom_encoder_app_util>) + + if(ENABLE_ENCODE_PERF_TESTS) + target_sources(test_libaom PRIVATE ${AOM_ENCODE_PERF_TEST_SOURCES}) + endif() + + if(NOT BUILD_SHARED_LIBS) + add_executable(test_intra_pred_speed + ${AOM_TEST_INTRA_PRED_SPEED_SOURCES} + $<TARGET_OBJECTS:aom_common_app_util>) + set_property(TARGET test_intra_pred_speed + PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + target_link_libraries(test_intra_pred_speed ${AOM_LIB_LINK_TYPE} aom + aom_gtest) + list(APPEND AOM_APP_TARGETS test_intra_pred_speed) + endif() + endif() + + target_link_libraries(test_libaom ${AOM_LIB_LINK_TYPE} aom aom_gtest) + + if(CONFIG_WEBM_IO) + target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:webm>) + endif() + if(HAVE_SSE2) + add_intrinsics_source_to_target("-msse2" "test_libaom" + "AOM_UNIT_TEST_COMMON_INTRIN_SSE2") + endif() + if(HAVE_SSSE3) + add_intrinsics_source_to_target("-mssse3" "test_libaom" + "AOM_UNIT_TEST_COMMON_INTRIN_SSSE3") + endif() + if(HAVE_SSE4_1) + add_intrinsics_source_to_target("-msse4.1" "test_libaom" + "AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1") + if(CONFIG_AV1_ENCODER) + if(AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1) + add_intrinsics_source_to_target("-msse4.1" "test_libaom" + "AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1") + endif() + endif() + endif() + if(HAVE_AVX2) + add_intrinsics_source_to_target("-mavx2" "test_libaom" + "AOM_UNIT_TEST_COMMON_INTRIN_AVX2") + endif() + if(HAVE_NEON) + add_intrinsics_source_to_target("${AOM_NEON_INTRIN_FLAG}" "test_libaom" + "AOM_UNIT_TEST_COMMON_INTRIN_NEON") + endif() + + if(ENABLE_TESTDATA) + make_test_data_lists("${AOM_UNIT_TEST_DATA_LIST_FILE}" test_files + test_file_checksums) + list(LENGTH test_files num_test_files) + list(LENGTH test_file_checksums num_test_file_checksums) + + math(EXPR max_file_index "${num_test_files} - 1") + foreach(test_index RANGE ${max_file_index}) + list(GET test_files ${test_index} test_file) + list(GET test_file_checksums ${test_index} test_file_checksum) + add_custom_target( + testdata_${test_index} + COMMAND ${CMAKE_COMMAND} + -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}" -DAOM_ROOT="${AOM_ROOT}" + -DAOM_TEST_FILE="${test_file}" + -DAOM_TEST_CHECKSUM=${test_file_checksum} -P + "${AOM_ROOT}/test/test_data_download_worker.cmake") + set_property(TARGET testdata_${test_index} + PROPERTY FOLDER ${AOM_IDE_TESTDATA_FOLDER}) + list(APPEND testdata_targets testdata_${test_index}) + endforeach() + + # Create a custom build target for running each test data download target. + add_custom_target(testdata) + add_dependencies(testdata ${testdata_targets}) + set_property(TARGET testdata PROPERTY FOLDER ${AOM_IDE_TESTDATA_FOLDER}) + + # Skip creation of test run targets when generating for Visual Studio and + # Xcode unless the user explicitly requests IDE test hosting. This is done + # to make build cycles in the IDE tolerable when the IDE command for build + # project is used to build AOM. Default behavior in IDEs is to build all + # targets, and the test run takes hours. + if(((NOT MSVC) AND (NOT XCODE)) OR ENABLE_IDE_TEST_HOSTING) + + # Pick a reasonable number of targets (this controls parallelization). + processorcount(num_test_targets) + if(num_test_targets EQUAL 0) # Just default to 10 targets when there's no + # processor count available. + set(num_test_targets 10) + endif() + + math(EXPR max_shard_index "${num_test_targets} - 1") + foreach(shard_index RANGE ${max_shard_index}) + set(test_name "test_${shard_index}") + add_custom_target(${test_name} + COMMAND ${CMAKE_COMMAND} + -DGTEST_SHARD_INDEX=${shard_index} + -DGTEST_TOTAL_SHARDS=${num_test_targets} + -DTEST_LIBAOM=$<TARGET_FILE:test_libaom> -P + "${AOM_ROOT}/test/test_runner.cmake" + DEPENDS testdata test_libaom) + set_property(TARGET ${test_name} PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + list(APPEND test_targets ${test_name}) + endforeach() + add_custom_target(runtests) + set_property(TARGET runtests PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + add_dependencies(runtests ${test_targets}) + endif() + endif() + + # Libaom_test_srcs.txt generation. + set(libaom_test_srcs_txt_file "${AOM_CONFIG_DIR}/libaom_test_srcs.txt") + file(WRITE "${libaom_test_srcs_txt_file}" + "# This file is generated. DO NOT EDIT.\n") + + # Static source file list first. + list(SORT AOM_TEST_SOURCE_VARS) + foreach(aom_test_source_var ${AOM_TEST_SOURCE_VARS}) + if("${aom_test_source_var}" STREQUAL "${last_aom_test_source_var}") + message( + FATAL_ERROR + "Duplicate AOM_TEST_SOURCE_VARS entry: ${aom_test_source_var}") + endif() + foreach(file ${${aom_test_source_var}}) + if(NOT "${file}" MATCHES "${AOM_CONFIG_DIR}") + string(REPLACE "${AOM_ROOT}/" "" file "${file}") + file(APPEND "${libaom_test_srcs_txt_file}" "${file}\n") + endif() + endforeach() + set(last_aom_test_source_var ${aom_test_source_var}) + endforeach() + + # libaom_test_srcs.gni generation + set(libaom_test_srcs_gni_file "${AOM_CONFIG_DIR}/libaom_test_srcs.gni") + file(WRITE "${libaom_test_srcs_gni_file}" + "# This file is generated. DO NOT EDIT.\n") + + foreach(aom_test_source_var ${AOM_TEST_SOURCE_VARS}) + string(TOLOWER "${aom_test_source_var}" aom_test_source_var_lowercase) + file(APPEND "${libaom_test_srcs_gni_file}" + "\n${aom_test_source_var_lowercase} = [\n") + + foreach(file ${${aom_test_source_var}}) + if(NOT "${file}" MATCHES "${AOM_CONFIG_DIR}") + string(REPLACE "${AOM_ROOT}/" "//third_party/libaom/source/libaom/" file + "${file}") + file(APPEND "${libaom_test_srcs_gni_file}" " \"${file}\",\n") + endif() + endforeach() + + file(APPEND "${libaom_test_srcs_gni_file}" "]\n") + endforeach() + + # Set up test for rc interface + if(CONFIG_AV1_ENCODER + AND ENABLE_TESTS + AND CONFIG_WEBM_IO + AND NOT BUILD_SHARED_LIBS + AND NOT CONFIG_REALTIME_ONLY) + add_executable(test_aom_rc ${AOM_RC_TEST_SOURCES}) + target_link_libraries(test_aom_rc ${AOM_LIB_LINK_TYPE} aom aom_av1_rc + aom_gtest aom_gmock webm) + set_property(TARGET test_aom_rc PROPERTY FOLDER ${AOM_IDE_TEST_FOLDER}) + list(APPEND AOM_APP_TARGETS test_aom_rc) + endif() + + set(AOM_APP_TARGETS ${AOM_APP_TARGETS} PARENT_SCOPE) +endfunction() diff --git a/third_party/aom/test/test_aom_rc.cc b/third_party/aom/test/test_aom_rc.cc new file mode 100644 index 0000000000..0182b62ec8 --- /dev/null +++ b/third_party/aom/test/test_aom_rc.cc @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/third_party/aom/test/test_data_download_worker.cmake b/third_party/aom/test/test_data_download_worker.cmake new file mode 100644 index 0000000000..a49038888d --- /dev/null +++ b/third_party/aom/test/test_data_download_worker.cmake @@ -0,0 +1,46 @@ +# +# Copyright (c) 2017, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and the +# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was +# not distributed with this source code in the LICENSE file, you can obtain it +# at www.aomedia.org/license/software. If the Alliance for Open Media Patent +# License 1.0 was not distributed with this source code in the PATENTS file, you +# can obtain it at www.aomedia.org/license/patent. +# +include("${AOM_ROOT}/test/test_data_util.cmake") + +# https://github.com/cheshirekow/cmake_format/issues/34 +# cmake-format: off +if (NOT AOM_ROOT OR NOT AOM_CONFIG_DIR OR NOT AOM_TEST_FILE + OR NOT AOM_TEST_CHECKSUM) + message(FATAL_ERROR + "AOM_ROOT, AOM_CONFIG_DIR, AOM_TEST_FILE and AOM_TEST_CHECKSUM must be + defined.") +endif () +# cmake-format: on + +set(AOM_TEST_DATA_URL "https://storage.googleapis.com/aom-test-data") + +if(NOT AOM_TEST_DATA_PATH) + set(AOM_TEST_DATA_PATH "$ENV{LIBAOM_TEST_DATA_PATH}") +endif() + +if("${AOM_TEST_DATA_PATH}" STREQUAL "") + message( + WARNING "Writing test data to ${AOM_CONFIG_DIR}, set " + "$LIBAOM_TEST_DATA_PATH in your environment to avoid this warning.") + set(AOM_TEST_DATA_PATH "${AOM_CONFIG_DIR}") +endif() + +if(NOT EXISTS "${AOM_TEST_DATA_PATH}") + file(MAKE_DIRECTORY "${AOM_TEST_DATA_PATH}") +endif() + +expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_PATH}" "filepath") +expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_URL}" "url") + +check_file("${filepath}" "${AOM_TEST_CHECKSUM}" "needs_download") +if(needs_download) + download_test_file("${url}" "${AOM_TEST_CHECKSUM}" "${filepath}") +endif() diff --git a/third_party/aom/test/test_data_util.cmake b/third_party/aom/test/test_data_util.cmake new file mode 100644 index 0000000000..069e1ad526 --- /dev/null +++ b/third_party/aom/test/test_data_util.cmake @@ -0,0 +1,665 @@ +# +# Copyright (c) 2017, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and the +# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was +# not distributed with this source code in the LICENSE file, you can obtain it +# at www.aomedia.org/license/software. If the Alliance for Open Media Patent +# License 1.0 was not distributed with this source code in the PATENTS file, you +# can obtain it at www.aomedia.org/license/patent. +# + +list(APPEND AOM_TEST_DATA_FILE_NAMES + "desktopqvga2.320_240.yuv" + "desktop1.320_180.yuv" + "hantro_collage_w176h144.yuv" + "hantro_collage_w352h288.yuv" + "hantro_collage_w352h288_nv12.yuv" + "hantro_odd.yuv" + "paris_352_288_30.y4m" + "park_joy_90p_10_420.y4m" + "park_joy_90p_10_422.y4m" + "park_joy_90p_10_444.y4m" + "park_joy_90p_12_420.y4m" + "park_joy_90p_12_422.y4m" + "park_joy_90p_12_444.y4m" + "park_joy_90p_8_420_a10-1.y4m" + "park_joy_90p_8_420.y4m" + "park_joy_90p_8_420_monochrome.y4m" + "park_joy_90p_8_420_vertical_csp.y4m" + "park_joy_90p_8_422.y4m" + "park_joy_90p_8_444.y4m" + "pixel_capture_w320h240.yuv" + "desktop_credits.y4m" + "rand_noise_w1280h720.yuv" + "niklas_1280_720_30.y4m" + "rush_hour_444.y4m" + "screendata.y4m" + "niklas_640_480_30.yuv" + "vase10x10.yuv" + "vase10x10_tiles.txt" + "bus_352x288_420_f20_b8.yuv" + "test_input_w1h1.yuv" + "crowd_run_360p_10_150f.y4m" + "wikipedia_420_360p_60f.y4m") + +if(ENABLE_DECODE_PERF_TESTS AND CONFIG_AV1_ENCODER) + list(APPEND AOM_TEST_DATA_FILE_NAMES "niklas_1280_720_30.yuv") +endif() + +if(CONFIG_AV1_DECODER) + list(APPEND AOM_TEST_DATA_FILE_NAMES + "av1-1-b8-00-quantizer-00.ivf" + "av1-1-b8-00-quantizer-00.ivf.md5" + "av1-1-b8-00-quantizer-01.ivf" + "av1-1-b8-00-quantizer-01.ivf.md5" + "av1-1-b8-00-quantizer-02.ivf" + "av1-1-b8-00-quantizer-02.ivf.md5" + "av1-1-b8-00-quantizer-03.ivf" + "av1-1-b8-00-quantizer-03.ivf.md5" + "av1-1-b8-00-quantizer-04.ivf" + "av1-1-b8-00-quantizer-04.ivf.md5" + "av1-1-b8-00-quantizer-05.ivf" + "av1-1-b8-00-quantizer-05.ivf.md5" + "av1-1-b8-00-quantizer-06.ivf" + "av1-1-b8-00-quantizer-06.ivf.md5" + "av1-1-b8-00-quantizer-07.ivf" + "av1-1-b8-00-quantizer-07.ivf.md5" + "av1-1-b8-00-quantizer-08.ivf" + "av1-1-b8-00-quantizer-08.ivf.md5" + "av1-1-b8-00-quantizer-09.ivf" + "av1-1-b8-00-quantizer-09.ivf.md5" + "av1-1-b8-00-quantizer-10.ivf" + "av1-1-b8-00-quantizer-10.ivf.md5" + "av1-1-b8-00-quantizer-11.ivf" + "av1-1-b8-00-quantizer-11.ivf.md5" + "av1-1-b8-00-quantizer-12.ivf" + "av1-1-b8-00-quantizer-12.ivf.md5" + "av1-1-b8-00-quantizer-13.ivf" + "av1-1-b8-00-quantizer-13.ivf.md5" + "av1-1-b8-00-quantizer-14.ivf" + "av1-1-b8-00-quantizer-14.ivf.md5" + "av1-1-b8-00-quantizer-15.ivf" + "av1-1-b8-00-quantizer-15.ivf.md5" + "av1-1-b8-00-quantizer-16.ivf" + "av1-1-b8-00-quantizer-16.ivf.md5" + "av1-1-b8-00-quantizer-17.ivf" + "av1-1-b8-00-quantizer-17.ivf.md5" + "av1-1-b8-00-quantizer-18.ivf" + "av1-1-b8-00-quantizer-18.ivf.md5" + "av1-1-b8-00-quantizer-19.ivf" + "av1-1-b8-00-quantizer-19.ivf.md5" + "av1-1-b8-00-quantizer-20.ivf" + "av1-1-b8-00-quantizer-20.ivf.md5" + "av1-1-b8-00-quantizer-21.ivf" + "av1-1-b8-00-quantizer-21.ivf.md5" + "av1-1-b8-00-quantizer-22.ivf" + "av1-1-b8-00-quantizer-22.ivf.md5" + "av1-1-b8-00-quantizer-23.ivf" + "av1-1-b8-00-quantizer-23.ivf.md5" + "av1-1-b8-00-quantizer-24.ivf" + "av1-1-b8-00-quantizer-24.ivf.md5" + "av1-1-b8-00-quantizer-25.ivf" + "av1-1-b8-00-quantizer-25.ivf.md5" + "av1-1-b8-00-quantizer-26.ivf" + "av1-1-b8-00-quantizer-26.ivf.md5" + "av1-1-b8-00-quantizer-27.ivf" + "av1-1-b8-00-quantizer-27.ivf.md5" + "av1-1-b8-00-quantizer-28.ivf" + "av1-1-b8-00-quantizer-28.ivf.md5" + "av1-1-b8-00-quantizer-29.ivf" + "av1-1-b8-00-quantizer-29.ivf.md5" + "av1-1-b8-00-quantizer-30.ivf" + "av1-1-b8-00-quantizer-30.ivf.md5" + "av1-1-b8-00-quantizer-31.ivf" + "av1-1-b8-00-quantizer-31.ivf.md5" + "av1-1-b8-00-quantizer-32.ivf" + "av1-1-b8-00-quantizer-32.ivf.md5" + "av1-1-b8-00-quantizer-33.ivf" + "av1-1-b8-00-quantizer-33.ivf.md5" + "av1-1-b8-00-quantizer-34.ivf" + "av1-1-b8-00-quantizer-34.ivf.md5" + "av1-1-b8-00-quantizer-35.ivf" + "av1-1-b8-00-quantizer-35.ivf.md5" + "av1-1-b8-00-quantizer-36.ivf" + "av1-1-b8-00-quantizer-36.ivf.md5" + "av1-1-b8-00-quantizer-37.ivf" + "av1-1-b8-00-quantizer-37.ivf.md5" + "av1-1-b8-00-quantizer-38.ivf" + "av1-1-b8-00-quantizer-38.ivf.md5" + "av1-1-b8-00-quantizer-39.ivf" + "av1-1-b8-00-quantizer-39.ivf.md5" + "av1-1-b8-00-quantizer-40.ivf" + "av1-1-b8-00-quantizer-40.ivf.md5" + "av1-1-b8-00-quantizer-41.ivf" + "av1-1-b8-00-quantizer-41.ivf.md5" + "av1-1-b8-00-quantizer-42.ivf" + "av1-1-b8-00-quantizer-42.ivf.md5" + "av1-1-b8-00-quantizer-43.ivf" + "av1-1-b8-00-quantizer-43.ivf.md5" + "av1-1-b8-00-quantizer-44.ivf" + "av1-1-b8-00-quantizer-44.ivf.md5" + "av1-1-b8-00-quantizer-45.ivf" + "av1-1-b8-00-quantizer-45.ivf.md5" + "av1-1-b8-00-quantizer-46.ivf" + "av1-1-b8-00-quantizer-46.ivf.md5" + "av1-1-b8-00-quantizer-47.ivf" + "av1-1-b8-00-quantizer-47.ivf.md5" + "av1-1-b8-00-quantizer-48.ivf" + "av1-1-b8-00-quantizer-48.ivf.md5" + "av1-1-b8-00-quantizer-49.ivf" + "av1-1-b8-00-quantizer-49.ivf.md5" + "av1-1-b8-00-quantizer-50.ivf" + "av1-1-b8-00-quantizer-50.ivf.md5" + "av1-1-b8-00-quantizer-51.ivf" + "av1-1-b8-00-quantizer-51.ivf.md5" + "av1-1-b8-00-quantizer-52.ivf" + "av1-1-b8-00-quantizer-52.ivf.md5" + "av1-1-b8-00-quantizer-53.ivf" + "av1-1-b8-00-quantizer-53.ivf.md5" + "av1-1-b8-00-quantizer-54.ivf" + "av1-1-b8-00-quantizer-54.ivf.md5" + "av1-1-b8-00-quantizer-55.ivf" + "av1-1-b8-00-quantizer-55.ivf.md5" + "av1-1-b8-00-quantizer-56.ivf" + "av1-1-b8-00-quantizer-56.ivf.md5" + "av1-1-b8-00-quantizer-57.ivf" + "av1-1-b8-00-quantizer-57.ivf.md5" + "av1-1-b8-00-quantizer-58.ivf" + "av1-1-b8-00-quantizer-58.ivf.md5" + "av1-1-b8-00-quantizer-59.ivf" + "av1-1-b8-00-quantizer-59.ivf.md5" + "av1-1-b8-00-quantizer-60.ivf" + "av1-1-b8-00-quantizer-60.ivf.md5" + "av1-1-b8-00-quantizer-61.ivf" + "av1-1-b8-00-quantizer-61.ivf.md5" + "av1-1-b8-00-quantizer-62.ivf" + "av1-1-b8-00-quantizer-62.ivf.md5" + "av1-1-b8-00-quantizer-63.ivf" + "av1-1-b8-00-quantizer-63.ivf.md5" + "av1-1-b10-00-quantizer-00.ivf" + "av1-1-b10-00-quantizer-00.ivf.md5" + "av1-1-b10-00-quantizer-01.ivf" + "av1-1-b10-00-quantizer-01.ivf.md5" + "av1-1-b10-00-quantizer-02.ivf" + "av1-1-b10-00-quantizer-02.ivf.md5" + "av1-1-b10-00-quantizer-03.ivf" + "av1-1-b10-00-quantizer-03.ivf.md5" + "av1-1-b10-00-quantizer-04.ivf" + "av1-1-b10-00-quantizer-04.ivf.md5" + "av1-1-b10-00-quantizer-05.ivf" + "av1-1-b10-00-quantizer-05.ivf.md5" + "av1-1-b10-00-quantizer-06.ivf" + "av1-1-b10-00-quantizer-06.ivf.md5" + "av1-1-b10-00-quantizer-07.ivf" + "av1-1-b10-00-quantizer-07.ivf.md5" + "av1-1-b10-00-quantizer-08.ivf" + "av1-1-b10-00-quantizer-08.ivf.md5" + "av1-1-b10-00-quantizer-09.ivf" + "av1-1-b10-00-quantizer-09.ivf.md5" + "av1-1-b10-00-quantizer-10.ivf" + "av1-1-b10-00-quantizer-10.ivf.md5" + "av1-1-b10-00-quantizer-11.ivf" + "av1-1-b10-00-quantizer-11.ivf.md5" + "av1-1-b10-00-quantizer-12.ivf" + "av1-1-b10-00-quantizer-12.ivf.md5" + "av1-1-b10-00-quantizer-13.ivf" + "av1-1-b10-00-quantizer-13.ivf.md5" + "av1-1-b10-00-quantizer-14.ivf" + "av1-1-b10-00-quantizer-14.ivf.md5" + "av1-1-b10-00-quantizer-15.ivf" + "av1-1-b10-00-quantizer-15.ivf.md5" + "av1-1-b10-00-quantizer-16.ivf" + "av1-1-b10-00-quantizer-16.ivf.md5" + "av1-1-b10-00-quantizer-17.ivf" + "av1-1-b10-00-quantizer-17.ivf.md5" + "av1-1-b10-00-quantizer-18.ivf" + "av1-1-b10-00-quantizer-18.ivf.md5" + "av1-1-b10-00-quantizer-19.ivf" + "av1-1-b10-00-quantizer-19.ivf.md5" + "av1-1-b10-00-quantizer-20.ivf" + "av1-1-b10-00-quantizer-20.ivf.md5" + "av1-1-b10-00-quantizer-21.ivf" + "av1-1-b10-00-quantizer-21.ivf.md5" + "av1-1-b10-00-quantizer-22.ivf" + "av1-1-b10-00-quantizer-22.ivf.md5" + "av1-1-b10-00-quantizer-23.ivf" + "av1-1-b10-00-quantizer-23.ivf.md5" + "av1-1-b10-00-quantizer-24.ivf" + "av1-1-b10-00-quantizer-24.ivf.md5" + "av1-1-b10-00-quantizer-25.ivf" + "av1-1-b10-00-quantizer-25.ivf.md5" + "av1-1-b10-00-quantizer-26.ivf" + "av1-1-b10-00-quantizer-26.ivf.md5" + "av1-1-b10-00-quantizer-27.ivf" + "av1-1-b10-00-quantizer-27.ivf.md5" + "av1-1-b10-00-quantizer-28.ivf" + "av1-1-b10-00-quantizer-28.ivf.md5" + "av1-1-b10-00-quantizer-29.ivf" + "av1-1-b10-00-quantizer-29.ivf.md5" + "av1-1-b10-00-quantizer-30.ivf" + "av1-1-b10-00-quantizer-30.ivf.md5" + "av1-1-b10-00-quantizer-31.ivf" + "av1-1-b10-00-quantizer-31.ivf.md5" + "av1-1-b10-00-quantizer-32.ivf" + "av1-1-b10-00-quantizer-32.ivf.md5" + "av1-1-b10-00-quantizer-33.ivf" + "av1-1-b10-00-quantizer-33.ivf.md5" + "av1-1-b10-00-quantizer-34.ivf" + "av1-1-b10-00-quantizer-34.ivf.md5" + "av1-1-b10-00-quantizer-35.ivf" + "av1-1-b10-00-quantizer-35.ivf.md5" + "av1-1-b10-00-quantizer-36.ivf" + "av1-1-b10-00-quantizer-36.ivf.md5" + "av1-1-b10-00-quantizer-37.ivf" + "av1-1-b10-00-quantizer-37.ivf.md5" + "av1-1-b10-00-quantizer-38.ivf" + "av1-1-b10-00-quantizer-38.ivf.md5" + "av1-1-b10-00-quantizer-39.ivf" + "av1-1-b10-00-quantizer-39.ivf.md5" + "av1-1-b10-00-quantizer-40.ivf" + "av1-1-b10-00-quantizer-40.ivf.md5" + "av1-1-b10-00-quantizer-41.ivf" + "av1-1-b10-00-quantizer-41.ivf.md5" + "av1-1-b10-00-quantizer-42.ivf" + "av1-1-b10-00-quantizer-42.ivf.md5" + "av1-1-b10-00-quantizer-43.ivf" + "av1-1-b10-00-quantizer-43.ivf.md5" + "av1-1-b10-00-quantizer-44.ivf" + "av1-1-b10-00-quantizer-44.ivf.md5" + "av1-1-b10-00-quantizer-45.ivf" + "av1-1-b10-00-quantizer-45.ivf.md5" + "av1-1-b10-00-quantizer-46.ivf" + "av1-1-b10-00-quantizer-46.ivf.md5" + "av1-1-b10-00-quantizer-47.ivf" + "av1-1-b10-00-quantizer-47.ivf.md5" + "av1-1-b10-00-quantizer-48.ivf" + "av1-1-b10-00-quantizer-48.ivf.md5" + "av1-1-b10-00-quantizer-49.ivf" + "av1-1-b10-00-quantizer-49.ivf.md5" + "av1-1-b10-00-quantizer-50.ivf" + "av1-1-b10-00-quantizer-50.ivf.md5" + "av1-1-b10-00-quantizer-51.ivf" + "av1-1-b10-00-quantizer-51.ivf.md5" + "av1-1-b10-00-quantizer-52.ivf" + "av1-1-b10-00-quantizer-52.ivf.md5" + "av1-1-b10-00-quantizer-53.ivf" + "av1-1-b10-00-quantizer-53.ivf.md5" + "av1-1-b10-00-quantizer-54.ivf" + "av1-1-b10-00-quantizer-54.ivf.md5" + "av1-1-b10-00-quantizer-55.ivf" + "av1-1-b10-00-quantizer-55.ivf.md5" + "av1-1-b10-00-quantizer-56.ivf" + "av1-1-b10-00-quantizer-56.ivf.md5" + "av1-1-b10-00-quantizer-57.ivf" + "av1-1-b10-00-quantizer-57.ivf.md5" + "av1-1-b10-00-quantizer-58.ivf" + "av1-1-b10-00-quantizer-58.ivf.md5" + "av1-1-b10-00-quantizer-59.ivf" + "av1-1-b10-00-quantizer-59.ivf.md5" + "av1-1-b10-00-quantizer-60.ivf" + "av1-1-b10-00-quantizer-60.ivf.md5" + "av1-1-b10-00-quantizer-61.ivf" + "av1-1-b10-00-quantizer-61.ivf.md5" + "av1-1-b10-00-quantizer-62.ivf" + "av1-1-b10-00-quantizer-62.ivf.md5" + "av1-1-b10-00-quantizer-63.ivf" + "av1-1-b10-00-quantizer-63.ivf.md5" + "av1-1-b10-23-film_grain-50.ivf" + "av1-1-b10-23-film_grain-50.ivf.md5" + "av1-1-b10-24-monochrome.ivf" + "av1-1-b10-24-monochrome.ivf.md5" + "av1-1-b8-01-size-16x16.ivf" + "av1-1-b8-01-size-16x16.ivf.md5" + "av1-1-b8-01-size-16x18.ivf" + "av1-1-b8-01-size-16x18.ivf.md5" + "av1-1-b8-01-size-16x32.ivf" + "av1-1-b8-01-size-16x32.ivf.md5" + "av1-1-b8-01-size-16x34.ivf" + "av1-1-b8-01-size-16x34.ivf.md5" + "av1-1-b8-01-size-16x64.ivf" + "av1-1-b8-01-size-16x64.ivf.md5" + "av1-1-b8-01-size-16x66.ivf" + "av1-1-b8-01-size-16x66.ivf.md5" + "av1-1-b8-01-size-18x16.ivf" + "av1-1-b8-01-size-18x16.ivf.md5" + "av1-1-b8-01-size-18x18.ivf" + "av1-1-b8-01-size-18x18.ivf.md5" + "av1-1-b8-01-size-18x32.ivf" + "av1-1-b8-01-size-18x32.ivf.md5" + "av1-1-b8-01-size-18x34.ivf" + "av1-1-b8-01-size-18x34.ivf.md5" + "av1-1-b8-01-size-18x64.ivf" + "av1-1-b8-01-size-18x64.ivf.md5" + "av1-1-b8-01-size-18x66.ivf" + "av1-1-b8-01-size-18x66.ivf.md5" + "av1-1-b8-01-size-196x196.ivf" + "av1-1-b8-01-size-196x196.ivf.md5" + "av1-1-b8-01-size-196x198.ivf" + "av1-1-b8-01-size-196x198.ivf.md5" + "av1-1-b8-01-size-196x200.ivf" + "av1-1-b8-01-size-196x200.ivf.md5" + "av1-1-b8-01-size-196x202.ivf" + "av1-1-b8-01-size-196x202.ivf.md5" + "av1-1-b8-01-size-196x208.ivf" + "av1-1-b8-01-size-196x208.ivf.md5" + "av1-1-b8-01-size-196x210.ivf" + "av1-1-b8-01-size-196x210.ivf.md5" + "av1-1-b8-01-size-196x224.ivf" + "av1-1-b8-01-size-196x224.ivf.md5" + "av1-1-b8-01-size-196x226.ivf" + "av1-1-b8-01-size-196x226.ivf.md5" + "av1-1-b8-01-size-198x196.ivf" + "av1-1-b8-01-size-198x196.ivf.md5" + "av1-1-b8-01-size-198x198.ivf" + "av1-1-b8-01-size-198x198.ivf.md5" + "av1-1-b8-01-size-198x200.ivf" + "av1-1-b8-01-size-198x200.ivf.md5" + "av1-1-b8-01-size-198x202.ivf" + "av1-1-b8-01-size-198x202.ivf.md5" + "av1-1-b8-01-size-198x208.ivf" + "av1-1-b8-01-size-198x208.ivf.md5" + "av1-1-b8-01-size-198x210.ivf" + "av1-1-b8-01-size-198x210.ivf.md5" + "av1-1-b8-01-size-198x224.ivf" + "av1-1-b8-01-size-198x224.ivf.md5" + "av1-1-b8-01-size-198x226.ivf" + "av1-1-b8-01-size-198x226.ivf.md5" + "av1-1-b8-01-size-200x196.ivf" + "av1-1-b8-01-size-200x196.ivf.md5" + "av1-1-b8-01-size-200x198.ivf" + "av1-1-b8-01-size-200x198.ivf.md5" + "av1-1-b8-01-size-200x200.ivf" + "av1-1-b8-01-size-200x200.ivf.md5" + "av1-1-b8-01-size-200x202.ivf" + "av1-1-b8-01-size-200x202.ivf.md5" + "av1-1-b8-01-size-200x208.ivf" + "av1-1-b8-01-size-200x208.ivf.md5" + "av1-1-b8-01-size-200x210.ivf" + "av1-1-b8-01-size-200x210.ivf.md5" + "av1-1-b8-01-size-200x224.ivf" + "av1-1-b8-01-size-200x224.ivf.md5" + "av1-1-b8-01-size-200x226.ivf" + "av1-1-b8-01-size-200x226.ivf.md5" + "av1-1-b8-01-size-202x196.ivf" + "av1-1-b8-01-size-202x196.ivf.md5" + "av1-1-b8-01-size-202x198.ivf" + "av1-1-b8-01-size-202x198.ivf.md5" + "av1-1-b8-01-size-202x200.ivf" + "av1-1-b8-01-size-202x200.ivf.md5" + "av1-1-b8-01-size-202x202.ivf" + "av1-1-b8-01-size-202x202.ivf.md5" + "av1-1-b8-01-size-202x208.ivf" + "av1-1-b8-01-size-202x208.ivf.md5" + "av1-1-b8-01-size-202x210.ivf" + "av1-1-b8-01-size-202x210.ivf.md5" + "av1-1-b8-01-size-202x224.ivf" + "av1-1-b8-01-size-202x224.ivf.md5" + "av1-1-b8-01-size-202x226.ivf" + "av1-1-b8-01-size-202x226.ivf.md5" + "av1-1-b8-01-size-208x196.ivf" + "av1-1-b8-01-size-208x196.ivf.md5" + "av1-1-b8-01-size-208x198.ivf" + "av1-1-b8-01-size-208x198.ivf.md5" + "av1-1-b8-01-size-208x200.ivf" + "av1-1-b8-01-size-208x200.ivf.md5" + "av1-1-b8-01-size-208x202.ivf" + "av1-1-b8-01-size-208x202.ivf.md5" + "av1-1-b8-01-size-208x208.ivf" + "av1-1-b8-01-size-208x208.ivf.md5" + "av1-1-b8-01-size-208x210.ivf" + "av1-1-b8-01-size-208x210.ivf.md5" + "av1-1-b8-01-size-208x224.ivf" + "av1-1-b8-01-size-208x224.ivf.md5" + "av1-1-b8-01-size-208x226.ivf" + "av1-1-b8-01-size-208x226.ivf.md5" + "av1-1-b8-01-size-210x196.ivf" + "av1-1-b8-01-size-210x196.ivf.md5" + "av1-1-b8-01-size-210x198.ivf" + "av1-1-b8-01-size-210x198.ivf.md5" + "av1-1-b8-01-size-210x200.ivf" + "av1-1-b8-01-size-210x200.ivf.md5" + "av1-1-b8-01-size-210x202.ivf" + "av1-1-b8-01-size-210x202.ivf.md5" + "av1-1-b8-01-size-210x208.ivf" + "av1-1-b8-01-size-210x208.ivf.md5" + "av1-1-b8-01-size-210x210.ivf" + "av1-1-b8-01-size-210x210.ivf.md5" + "av1-1-b8-01-size-210x224.ivf" + "av1-1-b8-01-size-210x224.ivf.md5" + "av1-1-b8-01-size-210x226.ivf" + "av1-1-b8-01-size-210x226.ivf.md5" + "av1-1-b8-01-size-224x196.ivf" + "av1-1-b8-01-size-224x196.ivf.md5" + "av1-1-b8-01-size-224x198.ivf" + "av1-1-b8-01-size-224x198.ivf.md5" + "av1-1-b8-01-size-224x200.ivf" + "av1-1-b8-01-size-224x200.ivf.md5" + "av1-1-b8-01-size-224x202.ivf" + "av1-1-b8-01-size-224x202.ivf.md5" + "av1-1-b8-01-size-224x208.ivf" + "av1-1-b8-01-size-224x208.ivf.md5" + "av1-1-b8-01-size-224x210.ivf" + "av1-1-b8-01-size-224x210.ivf.md5" + "av1-1-b8-01-size-224x224.ivf" + "av1-1-b8-01-size-224x224.ivf.md5" + "av1-1-b8-01-size-224x226.ivf" + "av1-1-b8-01-size-224x226.ivf.md5" + "av1-1-b8-01-size-226x196.ivf" + "av1-1-b8-01-size-226x196.ivf.md5" + "av1-1-b8-01-size-226x198.ivf" + "av1-1-b8-01-size-226x198.ivf.md5" + "av1-1-b8-01-size-226x200.ivf" + "av1-1-b8-01-size-226x200.ivf.md5" + "av1-1-b8-01-size-226x202.ivf" + "av1-1-b8-01-size-226x202.ivf.md5" + "av1-1-b8-01-size-226x208.ivf" + "av1-1-b8-01-size-226x208.ivf.md5" + "av1-1-b8-01-size-226x210.ivf" + "av1-1-b8-01-size-226x210.ivf.md5" + "av1-1-b8-01-size-226x224.ivf" + "av1-1-b8-01-size-226x224.ivf.md5" + "av1-1-b8-01-size-226x226.ivf" + "av1-1-b8-01-size-226x226.ivf.md5" + "av1-1-b8-01-size-32x16.ivf" + "av1-1-b8-01-size-32x16.ivf.md5" + "av1-1-b8-01-size-32x18.ivf" + "av1-1-b8-01-size-32x18.ivf.md5" + "av1-1-b8-01-size-32x32.ivf" + "av1-1-b8-01-size-32x32.ivf.md5" + "av1-1-b8-01-size-32x34.ivf" + "av1-1-b8-01-size-32x34.ivf.md5" + "av1-1-b8-01-size-32x64.ivf" + "av1-1-b8-01-size-32x64.ivf.md5" + "av1-1-b8-01-size-32x66.ivf" + "av1-1-b8-01-size-32x66.ivf.md5" + "av1-1-b8-01-size-34x16.ivf" + "av1-1-b8-01-size-34x16.ivf.md5" + "av1-1-b8-01-size-34x18.ivf" + "av1-1-b8-01-size-34x18.ivf.md5" + "av1-1-b8-01-size-34x32.ivf" + "av1-1-b8-01-size-34x32.ivf.md5" + "av1-1-b8-01-size-34x34.ivf" + "av1-1-b8-01-size-34x34.ivf.md5" + "av1-1-b8-01-size-34x64.ivf" + "av1-1-b8-01-size-34x64.ivf.md5" + "av1-1-b8-01-size-34x66.ivf" + "av1-1-b8-01-size-34x66.ivf.md5" + "av1-1-b8-01-size-64x16.ivf" + "av1-1-b8-01-size-64x16.ivf.md5" + "av1-1-b8-01-size-64x18.ivf" + "av1-1-b8-01-size-64x18.ivf.md5" + "av1-1-b8-01-size-64x32.ivf" + "av1-1-b8-01-size-64x32.ivf.md5" + "av1-1-b8-01-size-64x34.ivf" + "av1-1-b8-01-size-64x34.ivf.md5" + "av1-1-b8-01-size-64x64.ivf" + "av1-1-b8-01-size-64x64.ivf.md5" + "av1-1-b8-01-size-64x66.ivf" + "av1-1-b8-01-size-64x66.ivf.md5" + "av1-1-b8-01-size-66x16.ivf" + "av1-1-b8-01-size-66x16.ivf.md5" + "av1-1-b8-01-size-66x18.ivf" + "av1-1-b8-01-size-66x18.ivf.md5" + "av1-1-b8-01-size-66x32.ivf" + "av1-1-b8-01-size-66x32.ivf.md5" + "av1-1-b8-01-size-66x34.ivf" + "av1-1-b8-01-size-66x34.ivf.md5" + "av1-1-b8-01-size-66x64.ivf" + "av1-1-b8-01-size-66x64.ivf.md5" + "av1-1-b8-01-size-66x66.ivf" + "av1-1-b8-01-size-66x66.ivf.md5" + "av1-1-b8-02-allintra.ivf" + "av1-1-b8-02-allintra.ivf.md5" + "av1-1-b8-03-sizeup.mkv" + "av1-1-b8-03-sizeup.mkv.md5" + "av1-1-b8-03-sizedown.mkv" + "av1-1-b8-03-sizedown.mkv.md5" + "av1-1-b8-04-cdfupdate.ivf" + "av1-1-b8-04-cdfupdate.ivf.md5" + "av1-1-b8-05-mv.ivf" + "av1-1-b8-05-mv.ivf.md5" + "av1-1-b8-06-mfmv.ivf" + "av1-1-b8-06-mfmv.ivf.md5" + "av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf" + "av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf.md5" + "av1-1-b8-22-svc-L2T1.ivf" + "av1-1-b8-22-svc-L2T1.ivf.md5" + "av1-1-b8-22-svc-L1T2.ivf" + "av1-1-b8-22-svc-L1T2.ivf.md5" + "av1-1-b8-22-svc-L2T2.ivf" + "av1-1-b8-22-svc-L2T2.ivf.md5" + "av1-1-b8-23-film_grain-50.ivf" + "av1-1-b8-23-film_grain-50.ivf.md5" + "av1-1-b8-24-monochrome.ivf" + "av1-1-b8-24-monochrome.ivf.md5" + "invalid-bug-1814.ivf" + "invalid-bug-1814.ivf.res" + "invalid-chromium-906381.ivf" + "invalid-chromium-906381.ivf.res" + "invalid-google-142530197-1.ivf" + "invalid-google-142530197-1.ivf.res" + "invalid-google-142530197.ivf" + "invalid-google-142530197.ivf.res" + "invalid-oss-fuzz-10061.ivf" + "invalid-oss-fuzz-10061.ivf.res" + "invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf" + "invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf.res" + "invalid-oss-fuzz-10227.ivf" + "invalid-oss-fuzz-10227.ivf.res" + "invalid-oss-fuzz-10389.ivf" + "invalid-oss-fuzz-10389.ivf.res.4" + "invalid-oss-fuzz-10555.ivf" + "invalid-oss-fuzz-10555.ivf.res" + "invalid-oss-fuzz-10705.ivf" + "invalid-oss-fuzz-10705.ivf.res" + "invalid-oss-fuzz-10723.ivf" + "invalid-oss-fuzz-10723.ivf.res.2" + "invalid-oss-fuzz-10723.ivf.res.3" + "invalid-oss-fuzz-10779.ivf" + "invalid-oss-fuzz-10779.ivf.res" + "invalid-oss-fuzz-11477.ivf" + "invalid-oss-fuzz-11477.ivf.res" + "invalid-oss-fuzz-11479.ivf" + "invalid-oss-fuzz-11479.ivf.res.2" + "invalid-oss-fuzz-11523.ivf" + "invalid-oss-fuzz-11523.ivf.res.2" + "invalid-oss-fuzz-15363.ivf" + "invalid-oss-fuzz-15363.ivf.res" + "invalid-oss-fuzz-16437.ivf" + "invalid-oss-fuzz-16437.ivf.res.2" + "invalid-oss-fuzz-24706.ivf" + "invalid-oss-fuzz-24706.ivf.res" + "invalid-oss-fuzz-33030.ivf" + "invalid-oss-fuzz-33030.ivf.res" + "invalid-oss-fuzz-9288.ivf" + "invalid-oss-fuzz-9288.ivf.res" + "invalid-oss-fuzz-9463.ivf" + "invalid-oss-fuzz-9463.ivf.res.2" + "invalid-oss-fuzz-9482.ivf" + "invalid-oss-fuzz-9482.ivf.res" + "invalid-oss-fuzz-9720.ivf" + "invalid-oss-fuzz-9720.ivf.res") +endif() + +if(ENABLE_ENCODE_PERF_TESTS AND CONFIG_AV1_ENCODER) + list(APPEND AOM_TEST_DATA_FILE_NAMES "desktop_640_360_30.yuv" + "kirland_640_480_30.yuv" "macmarcomoving_640_480_30.yuv" + "macmarcostationary_640_480_30.yuv" "niklas_1280_720_30.yuv" + "tacomanarrows_640_480_30.yuv" + "tacomasmallcameramovement_640_480_30.yuv" + "thaloundeskmtg_640_480_30.yuv") +endif() + +# Parses test/test-data.sha1 and writes captured file names and checksums to +# $out_files and $out_checksums as lists. +function(make_test_data_lists test_data_file out_files out_checksums) + if(NOT test_data_file OR NOT EXISTS "${test_data_file}") + message(FATAL_ERROR "Test info file missing or empty (${test_data_file})") + endif() + + # Read $test_data_file into $files_and_checksums. $files_and_checksums becomes + # a list with an entry for each line from $test_data_file. + file(STRINGS "${test_data_file}" files_and_checksums) + + # Iterate over the list of lines and split it into $checksums and $filenames. + foreach(line ${files_and_checksums}) + string(FIND "${line}" " *" delim_pos) + + math(EXPR filename_pos "${delim_pos} + 2") + string(SUBSTRING "${line}" 0 ${delim_pos} checksum) + string(SUBSTRING "${line}" ${filename_pos} -1 filename) + + list(FIND AOM_TEST_DATA_FILE_NAMES ${filename} list_index) + if(NOT ${list_index} EQUAL -1) + + # Include the name and checksum in output only when the file is needed. + set(checksums ${checksums} ${checksum}) + set(filenames ${filenames} ${filename}) + endif() + endforeach() + + list(LENGTH filenames num_files) + list(LENGTH checksums num_checksums) + if(NOT checksums OR NOT filenames OR NOT num_files EQUAL num_checksums) + message(FATAL_ERROR "Parsing of ${test_data_file} failed.") + endif() + + set(${out_checksums} ${checksums} PARENT_SCOPE) + set(${out_files} ${filenames} PARENT_SCOPE) +endfunction() + +# Appends each file name in $test_files to $test_dir and adds the result path to +# $out_path_list. +function(expand_test_file_paths test_files test_dir out_path_list) + foreach(filename ${${test_files}}) + set(path_list ${path_list} "${test_dir}/${filename}") + endforeach() + set(${out_path_list} ${path_list} PARENT_SCOPE) +endfunction() + +function(check_file local_path expected_checksum out_needs_update) + if(EXISTS "${local_path}") + file(SHA1 "${local_path}" file_checksum) + else() + set(${out_needs_update} 1 PARENT_SCOPE) + return() + endif() + + if("${file_checksum}" STREQUAL "${expected_checksum}") + unset(${out_needs_update} PARENT_SCOPE) + else() + set(${out_needs_update} 1 PARENT_SCOPE) + return() + endif() + message("${local_path} up to date.") +endfunction() + +# Downloads data from $file_url, confirms that $file_checksum matches, and +# writes it to $local_path. +function(download_test_file file_url file_checksum local_path) + message("Downloading ${file_url} ...") + file(DOWNLOAD "${file_url}" "${local_path}" SHOW_PROGRESS EXPECTED_HASH + SHA1=${file_checksum}) + message("Download of ${file_url} complete.") +endfunction() diff --git a/third_party/aom/test/test_intra_pred_speed.cc b/third_party/aom/test/test_intra_pred_speed.cc new file mode 100644 index 0000000000..d5c94be092 --- /dev/null +++ b/third_party/aom/test/test_intra_pred_speed.cc @@ -0,0 +1,1742 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Test and time AOM intra-predictor functions + +#include <stdio.h> +#include <string> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/md5_helper.h" +#include "aom/aom_integer.h" +#include "aom_ports/mem.h" +#include "aom_ports/aom_timer.h" +#include "av1/common/common_data.h" + +// ----------------------------------------------------------------------------- + +namespace { + +// Note: +// APPLY_UNIT_TESTS +// 1: Do unit tests +// 0: Generate MD5 array as required +#define APPLY_UNIT_TESTS 1 + +typedef void (*AvxPredFunc)(uint8_t *dst, ptrdiff_t y_stride, + const uint8_t *above, const uint8_t *left); + +const int kBPS = 64; +const int kTotalPixels = kBPS * kBPS; +// 4 DC variants, V, H, PAETH, SMOOTH, SMOOTH_V, SMOOTH_H +const int kNumAv1IntraFuncs = 10; + +#if APPLY_UNIT_TESTS +const char *kAv1IntraPredNames[kNumAv1IntraFuncs] = { + "DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED", + "H_PRED", "PAETH_PRED", "SMOOTH_PRED", "SMOOTH_V_PRED", "SMOOTH_H_PRED", +}; +#endif // APPLY_UNIT_TESTS + +template <typename Pixel> +struct IntraPredTestMem { + void Init(int block_width, int block_height, int bd) { + ASSERT_LE(block_width, kBPS); + ASSERT_LE(block_height, kBPS); + // Note: for blocks having width <= 32 and height <= 32, we generate 32x32 + // random pixels as before to avoid having to recalculate all hashes again. + const int block_size_upto_32 = (block_width <= 32) && (block_height <= 32); + stride = block_size_upto_32 ? 32 : kBPS; + num_pixels = stride * stride; + libaom_test::ACMRandom rnd(libaom_test::ACMRandom::DeterministicSeed()); + above = above_mem + 16; + const int mask = (1 << bd) - 1; + for (int i = 0; i < num_pixels; ++i) ref_src[i] = rnd.Rand16() & mask; + for (int i = 0; i < stride; ++i) left[i] = rnd.Rand16() & mask; + for (int i = -1; i < stride; ++i) above[i] = rnd.Rand16() & mask; + + for (int i = stride; i < 2 * stride; ++i) { + left[i] = rnd.Rand16() & mask; + above[i] = rnd.Rand16() & mask; + } + } + + DECLARE_ALIGNED(16, Pixel, src[kTotalPixels]); + DECLARE_ALIGNED(16, Pixel, ref_src[kTotalPixels]); + DECLARE_ALIGNED(16, Pixel, left[2 * kBPS]); + Pixel *above; + int stride; + int num_pixels; + + private: + DECLARE_ALIGNED(16, Pixel, above_mem[2 * kBPS + 16]); +}; + +// ----------------------------------------------------------------------------- +// Low Bittdepth + +typedef IntraPredTestMem<uint8_t> Av1IntraPredTestMem; + +static const char *const kTxSizeStrings[TX_SIZES_ALL] = { + "4X4", "8X8", "16X16", "32X32", "64X64", "4X8", "8X4", + "8X16", "16X8", "16X32", "32X16", "32X64", "64X32", "4X16", + "16X4", "8X32", "32X8", "16X64", "64X16", +}; + +void CheckMd5Signature(TX_SIZE tx_size, bool is_hbd, + const char *const signatures[], const void *data, + size_t data_size, int elapsed_time, int idx) { + const std::string hbd_str = is_hbd ? "Hbd " : ""; + const std::string name_str = hbd_str + "Intra" + kTxSizeStrings[tx_size]; + libaom_test::MD5 md5; + md5.Add(reinterpret_cast<const uint8_t *>(data), data_size); +#if APPLY_UNIT_TESTS + printf("Mode %s[%13s]: %5d ms MD5: %s\n", name_str.c_str(), + kAv1IntraPredNames[idx], elapsed_time, md5.Get()); + EXPECT_STREQ(signatures[idx], md5.Get()); +#else + (void)signatures; + (void)elapsed_time; + (void)idx; + printf("\"%s\",\n", md5.Get()); +#endif +} + +void TestIntraPred(TX_SIZE tx_size, AvxPredFunc const *pred_funcs, + const char *const signatures[]) { + const int block_width = tx_size_wide[tx_size]; + const int block_height = tx_size_high[tx_size]; + const int num_pixels_per_test = + block_width * block_height * kNumAv1IntraFuncs; + const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test); + Av1IntraPredTestMem intra_pred_test_mem; + intra_pred_test_mem.Init(block_width, block_height, 8); + + for (int k = 0; k < kNumAv1IntraFuncs; ++k) { + if (pred_funcs[k] == nullptr) continue; + memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src, + sizeof(intra_pred_test_mem.src)); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int num_tests = 0; num_tests < kNumTests; ++num_tests) { + pred_funcs[k](intra_pred_test_mem.src, intra_pred_test_mem.stride, + intra_pred_test_mem.above, intra_pred_test_mem.left); + } + aom_usec_timer_mark(&timer); + const int elapsed_time = + static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000); + CheckMd5Signature( + tx_size, false, signatures, intra_pred_test_mem.src, + intra_pred_test_mem.num_pixels * sizeof(*intra_pred_test_mem.src), + elapsed_time, k); + } +} + +static const char *const kSignatures[TX_SIZES_ALL][kNumAv1IntraFuncs] = { + { + // 4X4 + "e7ed7353c3383fff942e500e9bfe82fe", + "2a4a26fcc6ce005eadc08354d196c8a9", + "269d92eff86f315d9c38fe7640d85b15", + "ae2960eea9f71ee3dabe08b282ec1773", + "6c1abcc44e90148998b51acd11144e9c", + "f7bb3186e1ef8a2b326037ff898cad8e", + "59fc0e923a08cfac0a493fb38988e2bb", + "9ff8bb37d9c830e6ab8ecb0c435d3c91", + "de6937fca02354f2874dbc5dbec5d5b3", + "723cf948137f7d8c7860d814e55ae67d", + }, + { + // 8X8 + "d8bbae5d6547cfc17e4f5f44c8730e88", + "373bab6d931868d41a601d9d88ce9ac3", + "6fdd5ff4ff79656c14747598ca9e3706", + "d9661c2811d6a73674f40ffb2b841847", + "7c722d10b19ccff0b8c171868e747385", + "f81dd986eb2b50f750d3a7da716b7e27", + "064404361748dd111a890a1470d7f0ea", + "dc29b7e1f78cc8e7525d5ea4c0ab9b78", + "97111eb1bc26bade6272015df829f1ae", + "d19a8a73cc46b807f2c5e817576cc1e1", + }, + { + // 16X16 + "50971c07ce26977d30298538fffec619", + "527a6b9e0dc5b21b98cf276305432bef", + "7eff2868f80ebc2c43a4f367281d80f7", + "67cd60512b54964ef6aff1bd4816d922", + "48371c87dc95c08a33b2048f89cf6468", + "b0acf2872ee411d7530af6d2625a7084", + "93d6b5352b571805ab16a55e1bbed86a", + "03764e4c0aebbc180e4e2c68fb06df2b", + "bb6c74c9076c9f266ab11fb57060d8e6", + "0c5162bc28489756ddb847b5678e6f07", + }, + { + // 32X32 + "a0a618c900e65ae521ccc8af789729f2", + "985aaa7c72b4a6c2fb431d32100cf13a", + "10662d09febc3ca13ee4e700120daeb5", + "b3b01379ba08916ef6b1b35f7d9ad51c", + "9f4261755795af97e34679c333ec7004", + "bc2c9da91ad97ef0d1610fb0a9041657", + "ef1653982b69e1f64bee3759f3e1ec45", + "1a51a675deba2c83282142eb48d3dc3d", + "866c224746dc260cda861a7b1b383fb3", + "cea23799fc3526e1b6a6ff02b42b82af", + }, + { + // 64X64 + "6e1094fa7b50bc813aa2ba29f5df8755", + "afe020786b83b793c2bbd9468097ff6e", + "be91585259bc37bf4dc1651936e90b3e", + "a1650dbcd56e10288c3e269eca37967d", + "9e5c34f3797e0cdd3cd9d4c05b0d8950", + "bc87be7ac899cc6a28f399d7516c49fe", + "9811fd0d2dd515f06122f5d1bd18b784", + "3c140e466f2c2c0d9cb7d2157ab8dc27", + "9543de76c925a8f6adc884cc7f98dc91", + "df1df0376cc944afe7e74e94f53e575a", + }, + { + // 4X8 + "d9fbebdc85f71ab1e18461b2db4a2adc", + "5ccb2a68284bc9714d94b8a06ccadbb2", + "735d059abc2744f3ff3f9590f7191b37", + "d9fbebdc85f71ab1e18461b2db4a2adc", + "6819497c44cd0ace120add83672996ee", + "7e3244f5a2d3edf81c7e962a842b97f9", + "809350f164cd4d1650850bb0f59c3260", + "1b60a394331eeab6927a6f8aaff57040", + "5307de1bd7329ba6b281d2c1b0b457f9", + "24c58a8138339846d95568efb91751db", + }, + { + // 8X4 + "23f9fc11344426c9bee2e06d57dfd628", + "2d71a26d1bae1fb34734de7b42fc5eb7", + "5af9c1b2fd9d5721fad67b67b3f7c816", + "00d71b17be662753813d515f197d145e", + "bef10ec984427e28f4390f43809d10af", + "77773cdfb7ed6bc882ab202a64b0a470", + "2cc48bd66d6b0121b5221d52ccd732af", + "b302155e1c9eeeafe2ba2bf68e807a46", + "561bc8d0e76d5041ebd5168fc6a115e1", + "81d0113fb1d0a9a24ffd6f1987b77948", + }, + { + // 8X16 + "c849de88b24f773dfcdd1d48d1209796", + "6cb807c1897b94866a0f3d3c56ed8695", + "d56db05a8ac7981762f5b877f486c4ef", + "b4bc01eb6e59a40922ad17715cafb04b", + "09d178439534f4062ae687c351f66d64", + "644501399cf73080ac606e5cef7ca09b", + "278076495180e17c065a95ab7278539a", + "9dd7f324816f242be408ffeb0c673732", + "f520c4a20acfa0bea1d253c6f0f040fd", + "85f38df809df2c2d7c8b4a157a65cd44", + }, + { + // 16X8 + "b4cbdbdf10ce13300b4063a3daf99e04", + "3731e1e6202064a9d0604d7c293ecee4", + "6c856188c4256a06452f0d5d70cac436", + "1f2192b4c8c497589484ea7bf9c944e8", + "84011bd4b7f565119d06787840e333a0", + "0e48949f7a6aa36f0d76b5d01f91124a", + "60eff8064634b6c73b10681356baeee9", + "1559aeb081a9c0c71111d6093c2ff9fd", + "c15479b739713773e5cabb748451987b", + "72e33ec12c9b67aea26d8d005fb82de2", + }, + { + // 16X32 + "abe5233d189cdbf79424721571bbaa7b", + "282759f81e3cfb2e2d396fe406b72a8b", + "e2224926c264f6f174cbc3167a233168", + "6814e85c2b33f8c9415d62e80394b47b", + "99cbbb60459c08a3061d72c4e4f6276a", + "1d1567d40b8e816f8c1f71e576fe0f87", + "36fdd371b624a075814d497c4832ec85", + "8ab8da61b727442b6ff692b40d0df018", + "e35a10ad7fdf2327e821504a90f6a6eb", + "1f7211e727dc1de7d6a55d082fbdd821", + }, + { + // 32X16 + "d1aeb8d5fdcfd3307922af01a798a4dc", + "b0bcb514ebfbee065faea9d34c12ae75", + "d6a18c63b4e909871c0137ca652fad23", + "fd047f2fc1b8ffb95d0eeef3e8796a45", + "645ab60779ea348fd93c81561c31bab9", + "4409633c9db8dff41ade4292a3a56e7f", + "5e36a11e069b31c2a739f3a9c7b37c24", + "e83b9483d702cfae496991c3c7fa92c0", + "12f6ddf98c7f30a277307f1ea935b030", + "354321d6c32bbdb0739e4fa2acbf41e1", + }, + { + // 32X64 + "0ce332b343934b34cd4417725faa85cb", + "4e2a2cfd8f56f15939bdfc753145b303", + "0f46d124ba9f48cdd5d5290acf786d6d", + "e1e8ed803236367821981500a3d9eebe", + "1d2f8e48e3adb7c448be05d9f66f4954", + "9fb2e176636a5689b26f73ca73fcc512", + "e720ebccae7e25e36f23da53ae5b5d6a", + "86fe4364734169aaa4520d799890d530", + "b1870290764bb1b100d1974e2bd70f1d", + "ce5b238e19d85ef69d85badfab4e63ae", + }, + { + // 64X32 + "a6c5aeb722615089efbca80b02951ceb", + "538424b24bd0830f21788e7238ca762f", + "80c15b303235f9bc2259027bb92dfdc4", + "e48e1ac15e97191a8fda08d62fff343e", + "12604b37875533665078405ef4582e35", + "0048afa17bd3e1632d68b96048836530", + "07a0cfcb56a5eed50c4bd6c26814336b", + "529d8a070de5bc6531fa3ee8f450c233", + "33c50a11c7d78f72434064f634305e95", + "e0ef7f0559c1a50ec5a8c12011b962f7", + }, + { + // 4X16 + "750491056568eb8fe15387b86bdf06b8", + "3a52dae9f599f08cfb3bd1b910dc0e11", + "af79f71e3e03dbeca44e2e13561f70c7", + "ca7dfd7624afc0c06fb5552f44398535", + "b591af115444bf43140c29c269f68fb2", + "483d942ae36e69e62f31eb215331416f", + "f14b58525e81870bc5d95c7ac71a347f", + "371208bb4027d9badb04095d1590bbc4", + "c7049c21b2924d70c7c12784d6b6b796", + "7d87233f4b5b0f12086045e5d7b2d4c2", + }, + { + // 16X4 + "7c6e325a65e77e732b3adbe237e045e4", + "24478f93ffcec47852e004d0fe948464", + "258d042c67d4ba3ecfa667f0adc9aebf", + "b2cd21d06959f159a1f3c4d9768ee7fb", + "b4e1f38157bf8410e7c3da02f687a343", + "869e703729eb0fc0711c254944ff5d5a", + "9638dd77105a640b146a8201ea7a0801", + "919d932c6af8a1cc7486e8ce996dd487", + "e1c9be493b6714c7ae48f30044c43140", + "bf0fe3889d654b2f6eb98c8fc751f9e4", + }, + { + // 8X32 + "8dfac4319fe0bd40013ffb3102da8c72", + "feb46b6dc4e2ca0a09533bfc51d4dcb0", + "850837ec714c37262216527aaf4cbbe9", + "4603c7800fb08361f163daca876e8bda", + "1ff95e7d2debc27b05806fb25abfd624", + "d81b9a51a062b23ca7823804cb7bec22", + "f1d8978158766f46335203608cb807e7", + "f3527096256258c0878d644a9d7d53ca", + "cbde98ac8b009953eb112807ad2ea29e", + "654fb1153415747feae599f538122af5", + }, + { + // 32X8 + "3d4ee16fab374357474f60b845327bc7", + "bc17c5059473a476df4e85f56395ad55", + "3d4ee16fab374357474f60b845327bc7", + "c14b8db34dc2355b84e3735c9ba16c7f", + "a71d25b5d47a92a8b9223c98f18458ee", + "6c1cfe2b1893f4576a80675687cb6426", + "92d11bbef8b85bb48d799bb055de3514", + "bcf81d1db8ae5cc03360467f44f498ec", + "79f8c564163555592e808e145eaf5c60", + "46fff139cef2ef773938bcc8b0e5abb8", + }, + { + // 16X64 + "3b2a053ee8b05a8ac35ad23b0422a151", + "12b0c69595328c465e0b25e0c9e3e9fc", + "f77c544ac8035e01920deae40cee7b07", + "727797ef15ccd8d325476fe8f12006a3", + "f3be77c0fe67eb5d9d515e92bec21eb7", + "f1ece6409e01e9dd98b800d49628247d", + "efd2ec9bfbbd4fd1f6604ea369df1894", + "ec703de918422b9e03197ba0ed60a199", + "739418efb89c07f700895deaa5d0b3e3", + "9943ae1bbeeebfe1d3a92dc39e049d63", + }, + { + // 64X16 + "821b76b1494d4f84d20817840f719a1a", + "69e462c3338a9aaf993c3f7cfbc15649", + "516d8f6eb054d74d150e7b444185b6b9", + "de1b736e9d99129609d6ef3a491507a0", + "fd9b4276e7affe1e0e4ce4f428058994", + "cd82fd361a4767ac29a9f406b480b8f3", + "2792c2f810157a4a6cb13c28529ff779", + "1220442d90c4255ba0969d28b91e93a6", + "c7253e10b45f7f67dfee3256c9b94825", + "879792198071c7e0b50b9b5010d8c18f", + }, +}; + +} // namespace + +// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors +// to TestIntraPred. The test name is 'arch.TestIntraPred_tx_size', e.g., +// C.TestIntraPred.0 +#define INTRA_PRED_TEST(arch, tx_size, dc, dc_left, dc_top, dc_128, v, h, \ + paeth, smooth, smooth_v, smooth_h) \ + TEST(arch, DISABLED_##TestIntraPred_##tx_size) { \ + static const AvxPredFunc aom_intra_pred[] = { \ + dc, dc_left, dc_top, dc_128, v, h, paeth, smooth, smooth_v, smooth_h \ + }; \ + TestIntraPred(tx_size, aom_intra_pred, kSignatures[tx_size]); \ + } + +// ----------------------------------------------------------------------------- +// 4x4, 4x8, 4x16 + +INTRA_PRED_TEST(C, TX_4X4, aom_dc_predictor_4x4_c, aom_dc_left_predictor_4x4_c, + aom_dc_top_predictor_4x4_c, aom_dc_128_predictor_4x4_c, + aom_v_predictor_4x4_c, aom_h_predictor_4x4_c, + aom_paeth_predictor_4x4_c, aom_smooth_predictor_4x4_c, + aom_smooth_v_predictor_4x4_c, aom_smooth_h_predictor_4x4_c) +INTRA_PRED_TEST(C, TX_4X8, aom_dc_predictor_4x8_c, aom_dc_left_predictor_4x8_c, + aom_dc_top_predictor_4x8_c, aom_dc_128_predictor_4x8_c, + aom_v_predictor_4x8_c, aom_h_predictor_4x8_c, + aom_paeth_predictor_4x8_c, aom_smooth_predictor_4x8_c, + aom_smooth_v_predictor_4x8_c, aom_smooth_h_predictor_4x8_c) +INTRA_PRED_TEST(C, TX_4X16, aom_dc_predictor_4x16_c, + aom_dc_left_predictor_4x16_c, aom_dc_top_predictor_4x16_c, + aom_dc_128_predictor_4x16_c, aom_v_predictor_4x16_c, + aom_h_predictor_4x16_c, aom_paeth_predictor_4x16_c, + aom_smooth_predictor_4x16_c, aom_smooth_v_predictor_4x16_c, + aom_smooth_h_predictor_4x16_c) + +#if HAVE_SSE2 +INTRA_PRED_TEST(SSE2, TX_4X4, aom_dc_predictor_4x4_sse2, + aom_dc_left_predictor_4x4_sse2, aom_dc_top_predictor_4x4_sse2, + aom_dc_128_predictor_4x4_sse2, aom_v_predictor_4x4_sse2, + aom_h_predictor_4x4_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_4X8, aom_dc_predictor_4x8_sse2, + aom_dc_left_predictor_4x8_sse2, aom_dc_top_predictor_4x8_sse2, + aom_dc_128_predictor_4x8_sse2, aom_v_predictor_4x8_sse2, + aom_h_predictor_4x8_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_4X16, aom_dc_predictor_4x16_sse2, + aom_dc_left_predictor_4x16_sse2, aom_dc_top_predictor_4x16_sse2, + aom_dc_128_predictor_4x16_sse2, aom_v_predictor_4x16_sse2, + aom_h_predictor_4x16_sse2, nullptr, nullptr, nullptr, nullptr) +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INTRA_PRED_TEST(SSSE3, TX_4X4, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_4x4_ssse3, + aom_smooth_predictor_4x4_ssse3, + aom_smooth_v_predictor_4x4_ssse3, + aom_smooth_h_predictor_4x4_ssse3) +INTRA_PRED_TEST(SSSE3, TX_4X8, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_4x8_ssse3, + aom_smooth_predictor_4x8_ssse3, + aom_smooth_v_predictor_4x8_ssse3, + aom_smooth_h_predictor_4x8_ssse3) +INTRA_PRED_TEST(SSSE3, TX_4X16, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_4x16_ssse3, + aom_smooth_predictor_4x16_ssse3, + aom_smooth_v_predictor_4x16_ssse3, + aom_smooth_h_predictor_4x16_ssse3) +#endif // HAVE_SSSE3 + +#if HAVE_NEON +INTRA_PRED_TEST(NEON, TX_4X4, aom_dc_predictor_4x4_neon, + aom_dc_left_predictor_4x4_neon, aom_dc_top_predictor_4x4_neon, + aom_dc_128_predictor_4x4_neon, aom_v_predictor_4x4_neon, + aom_h_predictor_4x4_neon, aom_paeth_predictor_4x4_neon, + aom_smooth_predictor_4x4_neon, aom_smooth_v_predictor_4x4_neon, + aom_smooth_h_predictor_4x4_neon) +INTRA_PRED_TEST(NEON, TX_4X8, aom_dc_predictor_4x8_neon, + aom_dc_left_predictor_4x8_neon, aom_dc_top_predictor_4x8_neon, + aom_dc_128_predictor_4x8_neon, aom_v_predictor_4x8_neon, + aom_h_predictor_4x8_neon, aom_paeth_predictor_4x8_neon, + aom_smooth_predictor_4x8_neon, aom_smooth_v_predictor_4x8_neon, + aom_smooth_h_predictor_4x8_neon) +INTRA_PRED_TEST(NEON, TX_4X16, aom_dc_predictor_4x16_neon, + aom_dc_left_predictor_4x16_neon, aom_dc_top_predictor_4x16_neon, + aom_dc_128_predictor_4x16_neon, aom_v_predictor_4x16_neon, + aom_h_predictor_4x16_neon, aom_paeth_predictor_4x16_neon, + aom_smooth_predictor_4x16_neon, + aom_smooth_v_predictor_4x16_neon, + aom_smooth_h_predictor_4x16_neon) +#endif // HAVE_NEON + +// ----------------------------------------------------------------------------- +// 8x8, 8x4, 8x16, 8x32 + +INTRA_PRED_TEST(C, TX_8X8, aom_dc_predictor_8x8_c, aom_dc_left_predictor_8x8_c, + aom_dc_top_predictor_8x8_c, aom_dc_128_predictor_8x8_c, + aom_v_predictor_8x8_c, aom_h_predictor_8x8_c, + aom_paeth_predictor_8x8_c, aom_smooth_predictor_8x8_c, + aom_smooth_v_predictor_8x8_c, aom_smooth_h_predictor_8x8_c) + +INTRA_PRED_TEST(C, TX_8X4, aom_dc_predictor_8x4_c, aom_dc_left_predictor_8x4_c, + aom_dc_top_predictor_8x4_c, aom_dc_128_predictor_8x4_c, + aom_v_predictor_8x4_c, aom_h_predictor_8x4_c, + aom_paeth_predictor_8x4_c, aom_smooth_predictor_8x4_c, + aom_smooth_v_predictor_8x4_c, aom_smooth_h_predictor_8x4_c) +INTRA_PRED_TEST(C, TX_8X16, aom_dc_predictor_8x16_c, + aom_dc_left_predictor_8x16_c, aom_dc_top_predictor_8x16_c, + aom_dc_128_predictor_8x16_c, aom_v_predictor_8x16_c, + aom_h_predictor_8x16_c, aom_paeth_predictor_8x16_c, + aom_smooth_predictor_8x16_c, aom_smooth_v_predictor_8x16_c, + aom_smooth_h_predictor_8x16_c) +INTRA_PRED_TEST(C, TX_8X32, aom_dc_predictor_8x32_c, + aom_dc_left_predictor_8x32_c, aom_dc_top_predictor_8x32_c, + aom_dc_128_predictor_8x32_c, aom_v_predictor_8x32_c, + aom_h_predictor_8x32_c, aom_paeth_predictor_8x32_c, + aom_smooth_predictor_8x32_c, aom_smooth_v_predictor_8x32_c, + aom_smooth_h_predictor_8x32_c) + +#if HAVE_SSE2 +INTRA_PRED_TEST(SSE2, TX_8X8, aom_dc_predictor_8x8_sse2, + aom_dc_left_predictor_8x8_sse2, aom_dc_top_predictor_8x8_sse2, + aom_dc_128_predictor_8x8_sse2, aom_v_predictor_8x8_sse2, + aom_h_predictor_8x8_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_8X4, aom_dc_predictor_8x4_sse2, + aom_dc_left_predictor_8x4_sse2, aom_dc_top_predictor_8x4_sse2, + aom_dc_128_predictor_8x4_sse2, aom_v_predictor_8x4_sse2, + aom_h_predictor_8x4_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_8X16, aom_dc_predictor_8x16_sse2, + aom_dc_left_predictor_8x16_sse2, aom_dc_top_predictor_8x16_sse2, + aom_dc_128_predictor_8x16_sse2, aom_v_predictor_8x16_sse2, + aom_h_predictor_8x16_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_8X32, aom_dc_predictor_8x32_sse2, + aom_dc_left_predictor_8x32_sse2, aom_dc_top_predictor_8x32_sse2, + aom_dc_128_predictor_8x32_sse2, aom_v_predictor_8x32_sse2, + aom_h_predictor_8x32_sse2, nullptr, nullptr, nullptr, nullptr) +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INTRA_PRED_TEST(SSSE3, TX_8X8, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_8x8_ssse3, + aom_smooth_predictor_8x8_ssse3, + aom_smooth_v_predictor_8x8_ssse3, + aom_smooth_h_predictor_8x8_ssse3) +INTRA_PRED_TEST(SSSE3, TX_8X4, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_8x4_ssse3, + aom_smooth_predictor_8x4_ssse3, + aom_smooth_v_predictor_8x4_ssse3, + aom_smooth_h_predictor_8x4_ssse3) +INTRA_PRED_TEST(SSSE3, TX_8X16, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_8x16_ssse3, + aom_smooth_predictor_8x16_ssse3, + aom_smooth_v_predictor_8x16_ssse3, + aom_smooth_h_predictor_8x16_ssse3) +INTRA_PRED_TEST(SSSE3, TX_8X32, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_8x32_ssse3, + aom_smooth_predictor_8x32_ssse3, + aom_smooth_v_predictor_8x32_ssse3, + aom_smooth_h_predictor_8x32_ssse3) +#endif // HAVE_SSSE3 + +#if HAVE_NEON +INTRA_PRED_TEST(NEON, TX_8X8, aom_dc_predictor_8x8_neon, + aom_dc_left_predictor_8x8_neon, aom_dc_top_predictor_8x8_neon, + aom_dc_128_predictor_8x8_neon, aom_v_predictor_8x8_neon, + aom_h_predictor_8x8_neon, aom_paeth_predictor_8x8_neon, + aom_smooth_predictor_8x8_neon, aom_smooth_v_predictor_8x8_neon, + aom_smooth_h_predictor_8x8_neon) +INTRA_PRED_TEST(NEON, TX_8X4, aom_dc_predictor_8x4_neon, + aom_dc_left_predictor_8x4_neon, aom_dc_top_predictor_8x4_neon, + aom_dc_128_predictor_8x4_neon, aom_v_predictor_8x4_neon, + aom_h_predictor_8x4_neon, aom_paeth_predictor_8x4_neon, + aom_smooth_predictor_8x4_neon, aom_smooth_v_predictor_8x4_neon, + aom_smooth_h_predictor_8x4_neon) +INTRA_PRED_TEST(NEON, TX_8X16, aom_dc_predictor_8x16_neon, + aom_dc_left_predictor_8x16_neon, aom_dc_top_predictor_8x16_neon, + aom_dc_128_predictor_8x16_neon, aom_v_predictor_8x16_neon, + aom_h_predictor_8x16_neon, aom_paeth_predictor_8x16_neon, + aom_smooth_predictor_8x16_neon, + aom_smooth_v_predictor_8x16_neon, + aom_smooth_h_predictor_8x16_neon) +INTRA_PRED_TEST(NEON, TX_8X32, aom_dc_predictor_8x32_neon, + aom_dc_left_predictor_8x32_neon, aom_dc_top_predictor_8x32_neon, + aom_dc_128_predictor_8x32_neon, aom_v_predictor_8x32_neon, + aom_h_predictor_8x32_neon, aom_paeth_predictor_8x32_neon, + aom_smooth_predictor_8x32_neon, + aom_smooth_v_predictor_8x32_neon, + aom_smooth_h_predictor_8x32_neon) +#endif // HAVE_NEON + +// ----------------------------------------------------------------------------- +// 16x16, 16x8, 16x32, 16x4, 16x64 + +INTRA_PRED_TEST(C, TX_16X16, aom_dc_predictor_16x16_c, + aom_dc_left_predictor_16x16_c, aom_dc_top_predictor_16x16_c, + aom_dc_128_predictor_16x16_c, aom_v_predictor_16x16_c, + aom_h_predictor_16x16_c, aom_paeth_predictor_16x16_c, + aom_smooth_predictor_16x16_c, aom_smooth_v_predictor_16x16_c, + aom_smooth_h_predictor_16x16_c) +INTRA_PRED_TEST(C, TX_16X8, aom_dc_predictor_16x8_c, + aom_dc_left_predictor_16x8_c, aom_dc_top_predictor_16x8_c, + aom_dc_128_predictor_16x8_c, aom_v_predictor_16x8_c, + aom_h_predictor_16x8_c, aom_paeth_predictor_16x8_c, + aom_smooth_predictor_16x8_c, aom_smooth_v_predictor_16x8_c, + aom_smooth_h_predictor_16x8_c) +INTRA_PRED_TEST(C, TX_16X32, aom_dc_predictor_16x32_c, + aom_dc_left_predictor_16x32_c, aom_dc_top_predictor_16x32_c, + aom_dc_128_predictor_16x32_c, aom_v_predictor_16x32_c, + aom_h_predictor_16x32_c, aom_paeth_predictor_16x32_c, + aom_smooth_predictor_16x32_c, aom_smooth_v_predictor_16x32_c, + aom_smooth_h_predictor_16x32_c) +INTRA_PRED_TEST(C, TX_16X4, aom_dc_predictor_16x4_c, + aom_dc_left_predictor_16x4_c, aom_dc_top_predictor_16x4_c, + aom_dc_128_predictor_16x4_c, aom_v_predictor_16x4_c, + aom_h_predictor_16x4_c, aom_paeth_predictor_16x4_c, + aom_smooth_predictor_16x4_c, aom_smooth_v_predictor_16x4_c, + aom_smooth_h_predictor_16x4_c) +INTRA_PRED_TEST(C, TX_16X64, aom_dc_predictor_16x64_c, + aom_dc_left_predictor_16x64_c, aom_dc_top_predictor_16x64_c, + aom_dc_128_predictor_16x64_c, aom_v_predictor_16x64_c, + aom_h_predictor_16x64_c, aom_paeth_predictor_16x64_c, + aom_smooth_predictor_16x64_c, aom_smooth_v_predictor_16x64_c, + aom_smooth_h_predictor_16x64_c) + +#if HAVE_SSE2 +INTRA_PRED_TEST(SSE2, TX_16X16, aom_dc_predictor_16x16_sse2, + aom_dc_left_predictor_16x16_sse2, + aom_dc_top_predictor_16x16_sse2, + aom_dc_128_predictor_16x16_sse2, aom_v_predictor_16x16_sse2, + aom_h_predictor_16x16_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_16X8, aom_dc_predictor_16x8_sse2, + aom_dc_left_predictor_16x8_sse2, aom_dc_top_predictor_16x8_sse2, + aom_dc_128_predictor_16x8_sse2, aom_v_predictor_16x8_sse2, + aom_h_predictor_16x8_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_16X32, aom_dc_predictor_16x32_sse2, + aom_dc_left_predictor_16x32_sse2, + aom_dc_top_predictor_16x32_sse2, + aom_dc_128_predictor_16x32_sse2, aom_v_predictor_16x32_sse2, + aom_h_predictor_16x32_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_16X64, aom_dc_predictor_16x64_sse2, + aom_dc_left_predictor_16x64_sse2, + aom_dc_top_predictor_16x64_sse2, + aom_dc_128_predictor_16x64_sse2, aom_v_predictor_16x64_sse2, + aom_h_predictor_16x64_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_16X4, aom_dc_predictor_16x4_sse2, + aom_dc_left_predictor_16x4_sse2, aom_dc_top_predictor_16x4_sse2, + aom_dc_128_predictor_16x4_sse2, aom_v_predictor_16x4_sse2, + aom_h_predictor_16x4_sse2, nullptr, nullptr, nullptr, nullptr) +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INTRA_PRED_TEST(SSSE3, TX_16X16, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_16x16_ssse3, + aom_smooth_predictor_16x16_ssse3, + aom_smooth_v_predictor_16x16_ssse3, + aom_smooth_h_predictor_16x16_ssse3) +INTRA_PRED_TEST(SSSE3, TX_16X8, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_16x8_ssse3, + aom_smooth_predictor_16x8_ssse3, + aom_smooth_v_predictor_16x8_ssse3, + aom_smooth_h_predictor_16x8_ssse3) +INTRA_PRED_TEST(SSSE3, TX_16X32, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_16x32_ssse3, + aom_smooth_predictor_16x32_ssse3, + aom_smooth_v_predictor_16x32_ssse3, + aom_smooth_h_predictor_16x32_ssse3) +INTRA_PRED_TEST(SSSE3, TX_16X64, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_16x64_ssse3, + aom_smooth_predictor_16x64_ssse3, + aom_smooth_v_predictor_16x64_ssse3, + aom_smooth_h_predictor_16x64_ssse3) +INTRA_PRED_TEST(SSSE3, TX_16X4, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_16x4_ssse3, + aom_smooth_predictor_16x4_ssse3, + aom_smooth_v_predictor_16x4_ssse3, + aom_smooth_h_predictor_16x4_ssse3) +#endif // HAVE_SSSE3 + +#if HAVE_AVX2 +INTRA_PRED_TEST(AVX2, TX_16X16, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_16x16_avx2, nullptr, nullptr, + nullptr) +INTRA_PRED_TEST(AVX2, TX_16X8, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_16x8_avx2, nullptr, nullptr, + nullptr) +INTRA_PRED_TEST(AVX2, TX_16X32, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_16x32_avx2, nullptr, nullptr, + nullptr) +INTRA_PRED_TEST(AVX2, TX_16X64, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_16x64_avx2, nullptr, nullptr, + nullptr) +#endif // HAVE_AVX2 + +#if HAVE_NEON +INTRA_PRED_TEST(NEON, TX_16X16, aom_dc_predictor_16x16_neon, + aom_dc_left_predictor_16x16_neon, + aom_dc_top_predictor_16x16_neon, + aom_dc_128_predictor_16x16_neon, aom_v_predictor_16x16_neon, + aom_h_predictor_16x16_neon, aom_paeth_predictor_16x16_neon, + aom_smooth_predictor_16x16_neon, + aom_smooth_v_predictor_16x16_neon, + aom_smooth_h_predictor_16x16_neon) +INTRA_PRED_TEST(NEON, TX_16X8, aom_dc_predictor_16x8_neon, + aom_dc_left_predictor_16x8_neon, aom_dc_top_predictor_16x8_neon, + aom_dc_128_predictor_16x8_neon, aom_v_predictor_16x8_neon, + aom_h_predictor_16x8_neon, aom_paeth_predictor_16x8_neon, + aom_smooth_predictor_16x8_neon, + aom_smooth_v_predictor_16x8_neon, + aom_smooth_h_predictor_16x8_neon) +INTRA_PRED_TEST(NEON, TX_16X32, aom_dc_predictor_16x32_neon, + aom_dc_left_predictor_16x32_neon, + aom_dc_top_predictor_16x32_neon, + aom_dc_128_predictor_16x32_neon, aom_v_predictor_16x32_neon, + aom_h_predictor_16x32_neon, aom_paeth_predictor_16x32_neon, + aom_smooth_predictor_16x32_neon, + aom_smooth_v_predictor_16x32_neon, + aom_smooth_h_predictor_16x32_neon) +INTRA_PRED_TEST(NEON, TX_16X4, aom_dc_predictor_16x4_neon, + aom_dc_left_predictor_16x4_neon, aom_dc_top_predictor_16x4_neon, + aom_dc_128_predictor_16x4_neon, aom_v_predictor_16x4_neon, + aom_h_predictor_16x4_neon, aom_paeth_predictor_16x4_neon, + aom_smooth_predictor_16x4_neon, + aom_smooth_v_predictor_16x4_neon, + aom_smooth_h_predictor_16x4_neon) +INTRA_PRED_TEST(NEON, TX_16X64, aom_dc_predictor_16x64_neon, + aom_dc_left_predictor_16x64_neon, + aom_dc_top_predictor_16x64_neon, + aom_dc_128_predictor_16x64_neon, aom_v_predictor_16x64_neon, + aom_h_predictor_16x64_neon, aom_paeth_predictor_16x64_neon, + aom_smooth_predictor_16x64_neon, + aom_smooth_v_predictor_16x64_neon, + aom_smooth_h_predictor_16x64_neon) +#endif // HAVE_NEON + +// ----------------------------------------------------------------------------- +// 32x32, 32x16, 32x64, 32x8 + +INTRA_PRED_TEST(C, TX_32X32, aom_dc_predictor_32x32_c, + aom_dc_left_predictor_32x32_c, aom_dc_top_predictor_32x32_c, + aom_dc_128_predictor_32x32_c, aom_v_predictor_32x32_c, + aom_h_predictor_32x32_c, aom_paeth_predictor_32x32_c, + aom_smooth_predictor_32x32_c, aom_smooth_v_predictor_32x32_c, + aom_smooth_h_predictor_32x32_c) +INTRA_PRED_TEST(C, TX_32X16, aom_dc_predictor_32x16_c, + aom_dc_left_predictor_32x16_c, aom_dc_top_predictor_32x16_c, + aom_dc_128_predictor_32x16_c, aom_v_predictor_32x16_c, + aom_h_predictor_32x16_c, aom_paeth_predictor_32x16_c, + aom_smooth_predictor_32x16_c, aom_smooth_v_predictor_32x16_c, + aom_smooth_h_predictor_32x16_c) +INTRA_PRED_TEST(C, TX_32X64, aom_dc_predictor_32x64_c, + aom_dc_left_predictor_32x64_c, aom_dc_top_predictor_32x64_c, + aom_dc_128_predictor_32x64_c, aom_v_predictor_32x64_c, + aom_h_predictor_32x64_c, aom_paeth_predictor_32x64_c, + aom_smooth_predictor_32x64_c, aom_smooth_v_predictor_32x64_c, + aom_smooth_h_predictor_32x64_c) +INTRA_PRED_TEST(C, TX_32X8, aom_dc_predictor_32x8_c, + aom_dc_left_predictor_32x8_c, aom_dc_top_predictor_32x8_c, + aom_dc_128_predictor_32x8_c, aom_v_predictor_32x8_c, + aom_h_predictor_32x8_c, aom_paeth_predictor_32x8_c, + aom_smooth_predictor_32x8_c, aom_smooth_v_predictor_32x8_c, + aom_smooth_h_predictor_32x8_c) + +#if HAVE_SSE2 +INTRA_PRED_TEST(SSE2, TX_32X32, aom_dc_predictor_32x32_sse2, + aom_dc_left_predictor_32x32_sse2, + aom_dc_top_predictor_32x32_sse2, + aom_dc_128_predictor_32x32_sse2, aom_v_predictor_32x32_sse2, + aom_h_predictor_32x32_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_32X16, aom_dc_predictor_32x16_sse2, + aom_dc_left_predictor_32x16_sse2, + aom_dc_top_predictor_32x16_sse2, + aom_dc_128_predictor_32x16_sse2, aom_v_predictor_32x16_sse2, + aom_h_predictor_32x16_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_32X64, aom_dc_predictor_32x64_sse2, + aom_dc_left_predictor_32x64_sse2, + aom_dc_top_predictor_32x64_sse2, + aom_dc_128_predictor_32x64_sse2, aom_v_predictor_32x64_sse2, + aom_h_predictor_32x64_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_32X8, aom_dc_predictor_32x8_sse2, + aom_dc_left_predictor_32x8_sse2, aom_dc_top_predictor_32x8_sse2, + aom_dc_128_predictor_32x8_sse2, aom_v_predictor_32x8_sse2, + aom_h_predictor_32x8_sse2, nullptr, nullptr, nullptr, nullptr) +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 +INTRA_PRED_TEST(SSSE3, TX_32X32, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_32x32_ssse3, + aom_smooth_predictor_32x32_ssse3, + aom_smooth_v_predictor_32x32_ssse3, + aom_smooth_h_predictor_32x32_ssse3) +INTRA_PRED_TEST(SSSE3, TX_32X16, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_32x16_ssse3, + aom_smooth_predictor_32x16_ssse3, + aom_smooth_v_predictor_32x16_ssse3, + aom_smooth_h_predictor_32x16_ssse3) +INTRA_PRED_TEST(SSSE3, TX_32X64, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_32x64_ssse3, + aom_smooth_predictor_32x64_ssse3, + aom_smooth_v_predictor_32x64_ssse3, + aom_smooth_h_predictor_32x64_ssse3) +INTRA_PRED_TEST(SSSE3, TX_32X8, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_32x8_ssse3, + aom_smooth_predictor_32x8_ssse3, + aom_smooth_v_predictor_32x8_ssse3, + aom_smooth_h_predictor_32x8_ssse3) +#endif // HAVE_SSSE3 + +#if HAVE_AVX2 +INTRA_PRED_TEST(AVX2, TX_32X32, aom_dc_predictor_32x32_avx2, + aom_dc_left_predictor_32x32_avx2, + aom_dc_top_predictor_32x32_avx2, + aom_dc_128_predictor_32x32_avx2, aom_v_predictor_32x32_avx2, + aom_h_predictor_32x32_avx2, aom_paeth_predictor_32x32_avx2, + nullptr, nullptr, nullptr) +INTRA_PRED_TEST(AVX2, TX_32X16, aom_dc_predictor_32x16_avx2, + aom_dc_left_predictor_32x16_avx2, + aom_dc_top_predictor_32x16_avx2, + aom_dc_128_predictor_32x16_avx2, aom_v_predictor_32x16_avx2, + nullptr, aom_paeth_predictor_32x16_avx2, nullptr, nullptr, + nullptr) +INTRA_PRED_TEST(AVX2, TX_32X64, aom_dc_predictor_32x64_avx2, + aom_dc_left_predictor_32x64_avx2, + aom_dc_top_predictor_32x64_avx2, + aom_dc_128_predictor_32x64_avx2, aom_v_predictor_32x64_avx2, + nullptr, aom_paeth_predictor_32x64_avx2, nullptr, nullptr, + nullptr) +#endif // HAVE_AVX2 + +#if HAVE_NEON +INTRA_PRED_TEST(NEON, TX_32X32, aom_dc_predictor_32x32_neon, + aom_dc_left_predictor_32x32_neon, + aom_dc_top_predictor_32x32_neon, + aom_dc_128_predictor_32x32_neon, aom_v_predictor_32x32_neon, + aom_h_predictor_32x32_neon, aom_paeth_predictor_32x32_neon, + aom_smooth_predictor_32x32_neon, + aom_smooth_v_predictor_32x32_neon, + aom_smooth_h_predictor_32x32_neon) +INTRA_PRED_TEST(NEON, TX_32X16, aom_dc_predictor_32x16_neon, + aom_dc_left_predictor_32x16_neon, + aom_dc_top_predictor_32x16_neon, + aom_dc_128_predictor_32x16_neon, aom_v_predictor_32x16_neon, + aom_h_predictor_32x16_neon, aom_paeth_predictor_32x16_neon, + aom_smooth_predictor_32x16_neon, + aom_smooth_v_predictor_32x16_neon, + aom_smooth_h_predictor_32x16_neon) +INTRA_PRED_TEST(NEON, TX_32X64, aom_dc_predictor_32x64_neon, + aom_dc_left_predictor_32x64_neon, + aom_dc_top_predictor_32x64_neon, + aom_dc_128_predictor_32x64_neon, aom_v_predictor_32x64_neon, + aom_h_predictor_32x64_neon, aom_paeth_predictor_32x64_neon, + aom_smooth_predictor_32x64_neon, + aom_smooth_v_predictor_32x64_neon, + aom_smooth_h_predictor_32x64_neon) +INTRA_PRED_TEST(NEON, TX_32X8, aom_dc_predictor_32x8_neon, + aom_dc_left_predictor_32x8_neon, aom_dc_top_predictor_32x8_neon, + aom_dc_128_predictor_32x8_neon, aom_v_predictor_32x8_neon, + aom_h_predictor_32x8_neon, aom_paeth_predictor_32x8_neon, + aom_smooth_predictor_32x8_neon, + aom_smooth_v_predictor_32x8_neon, + aom_smooth_h_predictor_32x8_neon) +#endif // HAVE_NEON + +// ----------------------------------------------------------------------------- +// 64x64, 64x32, 64x16 + +INTRA_PRED_TEST(C, TX_64X64, aom_dc_predictor_64x64_c, + aom_dc_left_predictor_64x64_c, aom_dc_top_predictor_64x64_c, + aom_dc_128_predictor_64x64_c, aom_v_predictor_64x64_c, + aom_h_predictor_64x64_c, aom_paeth_predictor_64x64_c, + aom_smooth_predictor_64x64_c, aom_smooth_v_predictor_64x64_c, + aom_smooth_h_predictor_64x64_c) +INTRA_PRED_TEST(C, TX_64X32, aom_dc_predictor_64x32_c, + aom_dc_left_predictor_64x32_c, aom_dc_top_predictor_64x32_c, + aom_dc_128_predictor_64x32_c, aom_v_predictor_64x32_c, + aom_h_predictor_64x32_c, aom_paeth_predictor_64x32_c, + aom_smooth_predictor_64x32_c, aom_smooth_v_predictor_64x32_c, + aom_smooth_h_predictor_64x32_c) +INTRA_PRED_TEST(C, TX_64X16, aom_dc_predictor_64x16_c, + aom_dc_left_predictor_64x16_c, aom_dc_top_predictor_64x16_c, + aom_dc_128_predictor_64x16_c, aom_v_predictor_64x16_c, + aom_h_predictor_64x16_c, aom_paeth_predictor_64x16_c, + aom_smooth_predictor_64x16_c, aom_smooth_v_predictor_64x16_c, + aom_smooth_h_predictor_64x16_c) + +#if HAVE_SSE2 +INTRA_PRED_TEST(SSE2, TX_64X64, aom_dc_predictor_64x64_sse2, + aom_dc_left_predictor_64x64_sse2, + aom_dc_top_predictor_64x64_sse2, + aom_dc_128_predictor_64x64_sse2, aom_v_predictor_64x64_sse2, + aom_h_predictor_64x64_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_64X32, aom_dc_predictor_64x32_sse2, + aom_dc_left_predictor_64x32_sse2, + aom_dc_top_predictor_64x32_sse2, + aom_dc_128_predictor_64x32_sse2, aom_v_predictor_64x32_sse2, + aom_h_predictor_64x32_sse2, nullptr, nullptr, nullptr, nullptr) +INTRA_PRED_TEST(SSE2, TX_64X16, aom_dc_predictor_64x16_sse2, + aom_dc_left_predictor_64x16_sse2, + aom_dc_top_predictor_64x16_sse2, + aom_dc_128_predictor_64x16_sse2, aom_v_predictor_64x16_sse2, + aom_h_predictor_64x16_sse2, nullptr, nullptr, nullptr, nullptr) +#endif + +#if HAVE_SSSE3 +INTRA_PRED_TEST(SSSE3, TX_64X64, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_64x64_ssse3, + aom_smooth_predictor_64x64_ssse3, + aom_smooth_v_predictor_64x64_ssse3, + aom_smooth_h_predictor_64x64_ssse3) +INTRA_PRED_TEST(SSSE3, TX_64X32, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_64x32_ssse3, + aom_smooth_predictor_64x32_ssse3, + aom_smooth_v_predictor_64x32_ssse3, + aom_smooth_h_predictor_64x32_ssse3) +INTRA_PRED_TEST(SSSE3, TX_64X16, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, aom_paeth_predictor_64x16_ssse3, + aom_smooth_predictor_64x16_ssse3, + aom_smooth_v_predictor_64x16_ssse3, + aom_smooth_h_predictor_64x16_ssse3) +#endif + +#if HAVE_AVX2 +INTRA_PRED_TEST(AVX2, TX_64X64, aom_dc_predictor_64x64_avx2, + aom_dc_left_predictor_64x64_avx2, + aom_dc_top_predictor_64x64_avx2, + aom_dc_128_predictor_64x64_avx2, aom_v_predictor_64x64_avx2, + nullptr, aom_paeth_predictor_64x64_avx2, nullptr, nullptr, + nullptr) +INTRA_PRED_TEST(AVX2, TX_64X32, aom_dc_predictor_64x32_avx2, + aom_dc_left_predictor_64x32_avx2, + aom_dc_top_predictor_64x32_avx2, + aom_dc_128_predictor_64x32_avx2, aom_v_predictor_64x32_avx2, + nullptr, aom_paeth_predictor_64x32_avx2, nullptr, nullptr, + nullptr) +INTRA_PRED_TEST(AVX2, TX_64X16, aom_dc_predictor_64x16_avx2, + aom_dc_left_predictor_64x16_avx2, + aom_dc_top_predictor_64x16_avx2, + aom_dc_128_predictor_64x16_avx2, aom_v_predictor_64x16_avx2, + nullptr, aom_paeth_predictor_64x16_avx2, nullptr, nullptr, + nullptr) +#endif + +#if HAVE_NEON +INTRA_PRED_TEST(NEON, TX_64X64, aom_dc_predictor_64x64_neon, + aom_dc_left_predictor_64x64_neon, + aom_dc_top_predictor_64x64_neon, + aom_dc_128_predictor_64x64_neon, aom_v_predictor_64x64_neon, + aom_h_predictor_64x64_neon, aom_paeth_predictor_64x64_neon, + aom_smooth_predictor_64x64_neon, + aom_smooth_v_predictor_64x64_neon, + aom_smooth_h_predictor_64x64_neon) +INTRA_PRED_TEST(NEON, TX_64X32, aom_dc_predictor_64x32_neon, + aom_dc_left_predictor_64x32_neon, + aom_dc_top_predictor_64x32_neon, + aom_dc_128_predictor_64x32_neon, aom_v_predictor_64x32_neon, + aom_h_predictor_64x32_neon, aom_paeth_predictor_64x32_neon, + aom_smooth_predictor_64x32_neon, + aom_smooth_v_predictor_64x32_neon, + aom_smooth_h_predictor_64x32_neon) +INTRA_PRED_TEST(NEON, TX_64X16, aom_dc_predictor_64x16_neon, + aom_dc_left_predictor_64x16_neon, + aom_dc_top_predictor_64x16_neon, + aom_dc_128_predictor_64x16_neon, aom_v_predictor_64x16_neon, + aom_h_predictor_64x16_neon, aom_paeth_predictor_64x16_neon, + aom_smooth_predictor_64x16_neon, + aom_smooth_v_predictor_64x16_neon, + aom_smooth_h_predictor_64x16_neon) +#endif // HAVE_NEON + +#if CONFIG_AV1_HIGHBITDEPTH +// ----------------------------------------------------------------------------- +// High Bitdepth +namespace { + +typedef void (*AvxHighbdPredFunc)(uint16_t *dst, ptrdiff_t y_stride, + const uint16_t *above, const uint16_t *left, + int bd); + +typedef IntraPredTestMem<uint16_t> Av1HighbdIntraPredTestMem; + +void TestHighbdIntraPred(TX_SIZE tx_size, AvxHighbdPredFunc const *pred_funcs, + const char *const signatures[]) { + const int block_width = tx_size_wide[tx_size]; + const int block_height = tx_size_high[tx_size]; + const int num_pixels_per_test = + block_width * block_height * kNumAv1IntraFuncs; + const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test); + Av1HighbdIntraPredTestMem intra_pred_test_mem; + const int bd = 12; + intra_pred_test_mem.Init(block_width, block_height, bd); + + for (int k = 0; k < kNumAv1IntraFuncs; ++k) { + if (pred_funcs[k] == nullptr) continue; + memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src, + sizeof(intra_pred_test_mem.src)); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int num_tests = 0; num_tests < kNumTests; ++num_tests) { + pred_funcs[k](intra_pred_test_mem.src, intra_pred_test_mem.stride, + intra_pred_test_mem.above, intra_pred_test_mem.left, bd); + } + aom_usec_timer_mark(&timer); + const int elapsed_time = + static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000); + CheckMd5Signature( + tx_size, true, signatures, intra_pred_test_mem.src, + intra_pred_test_mem.num_pixels * sizeof(*intra_pred_test_mem.src), + elapsed_time, k); + } +} + +static const char *const kHighbdSignatures[TX_SIZES_ALL][kNumAv1IntraFuncs] = { + { + // 4X4 + "11f74af6c5737df472f3275cbde062fa", + "51bea056b6447c93f6eb8f6b7e8f6f71", + "27e97f946766331795886f4de04c5594", + "53ab15974b049111fb596c5168ec7e3f", + "f0b640bb176fbe4584cf3d32a9b0320a", + "729783ca909e03afd4b47111c80d967b", + "6e30009c45474a22032678b1bd579c8f", + "e57cba016d808aa8a35619df2a65f049", + "55a6c37f39afcbbf5abca4a985b96459", + "a623d45b37dafec1f8a75c4c5218913d", + }, + { + // 8X8 + "03da8829fe94663047fd108c5fcaa71d", + "ecdb37b8120a2d3a4c706b016bd1bfd7", + "1d4543ed8d2b9368cb96898095fe8a75", + "f791c9a67b913cbd82d9da8ecede30e2", + "065c70646f4dbaff913282f55a45a441", + "51f87123616662ef7c35691497dfd0ba", + "85c01ba03df68f9ece7bd3fa0f8980e6", + "ad19b7dac092f56df6d054e1f67f21e7", + "0edc415b5dd7299f7a34fb9f71d31d78", + "2bc8ec19e9f4b77a64b8a0a1f6aec7e7", + }, + { + // 16X16 + "e33cb3f56a878e2fddb1b2fc51cdd275", + "c7bff6f04b6052c8ab335d726dbbd52d", + "d0b0b47b654a9bcc5c6008110a44589b", + "78f5da7b10b2b9ab39f114a33b6254e9", + "c78e31d23831abb40d6271a318fdd6f3", + "90d1347f4ec9198a0320daecb6ff90b8", + "e63ded54ab3d0e8728b6f24d4f01e53f", + "35ce21fbe0ea114c089fc3489a78155d", + "f277f6ef8e4d717f1f0dfe2706ac197d", + "e8014d3f41256976c02e0f1e622ba2b9", + }, + { + // 32X32 + "a3e8056ba7e36628cce4917cd956fedd", + "cc7d3024fe8748b512407edee045377e", + "2aab0a0f330a1d3e19b8ecb8f06387a3", + "a547bc3fb7b06910bf3973122a426661", + "26f712514da95042f93d6e8dc8e431dc", + "bb08c6e16177081daa3d936538dbc2e3", + "84bf83f94a51b33654ca940c6f8bc057", + "7168b03fc31bf29596a344d6a35d007c", + "b073a70d3672f1282236994f5d12e94b", + "c51607aebad5dcb3c1e3b58ef9e5b84e", + }, + { + // 64X64 + "a6baa0d4bfb2269a94c7a38f86a4bccf", + "3f1ef5f473a49eba743f17a3324adf9d", + "12ac11889ae5f55b7781454efd706a6a", + "d9a906c0e692b22e1b4414e71a704b7e", + "47d4cadd56f70c11ff8f3e5d8df81161", + "de997744cf24c16c5ac2a36b02b351cc", + "23781211ae178ddeb6c4bb97a6bd7d83", + "a79d2e28340ca34b9e37daabbf030f63", + "0372bd3ddfc258750a6ac106b70587f4", + "228ef625d9460cbf6fa253a16a730976", + }, + { + // 4X8 + "22d519b796d59644043466320e4ccd14", + "09513a738c49b3f9542d27f34abbe1d5", + "807ae5e8813443ff01e71be6efacfb69", + "cbfa18d0293430b6e9708b0be1fd2394", + "346c354c34ec7fa780b576db355dab88", + "f97dae85c35359632380b09ca98d611e", + "698ae351d8896d89ed9e4e67b6e53eda", + "dcc197034a9c45a3d8238bf085835f4e", + "7a35e2c42ffdc2efc2d6d1d75a100fc7", + "41ab6cebd4516c87a91b2a593e2c2506", + }, + { + // 8X4 + "d58cd4c4bf3b7bbaa5db5e1a5622ec78", + "6e572c35aa782d00cafcb99e9ea047ea", + "e8c22a3702b416dc9ab974505afbed09", + "aaa4e4762a795aad7ad74de0c662c4e4", + "a19f9101967383c3dcbd516dc317a291", + "9ab8cb91f1a595b9ebe3fe8de58031aa", + "2cf9021d5f1169268699807ee118b65f", + "ee9605fcbd6fb871f1c5cd81a6989327", + "b4871af8316089e3e23522175df7e93f", + "d33301e1c2cb173be46792a22d19881a", + }, + { + // 8X16 + "4562de1d0336610880fdd5685498a9ec", + "16310fa7076394f16fc85c4b149d89c9", + "0e94af88e1dc573b6f0f499cddd1f530", + "dfd245ee20d091c67809160340365aa9", + "d3562504327f70c096c5be23fd8a3747", + "601b853558502acbb5135eadd2da117a", + "3c624345a723a1b2b1bea05a6a08bc99", + "2a9c781de609e0184cc7ab442050f4e5", + "0ddc5035c22252747126b61fc238c74d", + "e43f5d83bab759af69c7b6773fc8f9b2", + }, + { + // 16X8 + "a57d6b5a9bfd30c29591d8717ace9c51", + "f5907ba97ee6c53e339e953fc8d845ee", + "ea3aa727913ce45af06f89dd1808db5f", + "408af4f23e48d14b48ee35ae094fcd18", + "85c41cbcb5d744f7961e8950026fbffe", + "8a4e588a837638887ba671f8d4910485", + "b792d8826b67a21757ea7097cff9e05b", + "f94ce7101bb87fd3bb9312112527dbf4", + "688c6660a6dc6fa61fa1aa38e708c209", + "0cdf641b4f81d69509c92ae0b93ef5ff", + }, + { + // 16X32 + "aee4b3b0e3cc02d48e2c40d77f807927", + "8baef2b2e789f79c8df9d90ad10f34a4", + "038c38ee3c4f090bb8d736eab136aafc", + "1a3de2aaeaffd68a9fd6c7f6557b83f3", + "385c6e0ea29421dd81011a2934641e26", + "6cf96c285d1a2d4787f955dad715b08c", + "2d7f75dcd73b9528c8396279ff09ff3a", + "5a63cd1841e4ed470e4ca5ef845f2281", + "610d899ca945fbead33287d4335a8b32", + "6bafaad81fce37be46730187e78d8b11", + }, + { + // 32X16 + "290b23c9f5a1de7905bfa71a942da29b", + "701e7b82593c66da5052fc4b6afd79ce", + "4da828c5455cd246735a663fbb204989", + "e3fbeaf234efece8dbd752b77226200c", + "4d1d8c969f05155a7e7e84cf7aad021b", + "c22e4877c2c946d5bdc0d542e29e70cf", + "8ac1ce815e7780500f842b0beb0bb980", + "9fee2e2502b507f25bfad30a55b0b610", + "4ced9c212ec6f9956e27f68a91b59fef", + "4a7a0b93f138bb0863e4e465b01ec0b1", + }, + { + // 32X64 + "ad9cfc395a5c5644a21d958c7274ac14", + "f29d6d03c143ddf96fef04c19f2c8333", + "a8bdc852ef704dd4975c61893e8fbc3f", + "7d0bd7dea26226741dbca9a97f27fa74", + "45c27c5cca9a91b6ae8379feb0881c9f", + "8a0b78df1e001b85c874d686eac4aa1b", + "ce9fa75fac54a3f6c0cc3f2083b938f1", + "c0dca10d88762c954af18dc9e3791a39", + "61df229eddfccab913b8fda4bb02f9ac", + "4f4df6bc8d50a5600b573f0e44d70e66", + }, + { + // 64X32 + "db9d82921fd88b24fdff6f849f2f9c87", + "5ecc7fdc52d2f575ad4f2d0e9e6b1e11", + "b4581311a0a73d95dfac7f8f44591032", + "68bd283cfd1a125f6b2ee47cee874d36", + "804179f05c032908a5e36077bb87c994", + "fc5fd041a8ee779015394d0c066ee43c", + "68f5579ccadfe9a1baafb158334a3db2", + "fe237e45e215ab06d79046da9ad71e84", + "9a8a938a6824551bf7d21b8fd1d70ea1", + "eb7332f2017cd96882c76e7136aeaf53", + }, + { + // 4X16 + "7bafa307d507747b8132e7735b7f1c73", + "e58bc2d8213a97d1fea9cfb73d7a9633", + "435f8a8e8bbf14dbf2fe16b2be9e97aa", + "1d0e767b68d84acbfb50b7a04e633836", + "5f713bd7b324fe73bb7063e35ee14e5e", + "0dac4e1fa3d59814202715468c01ed56", + "47709d1db4a330c7a8900f450e6fddd1", + "258e0b930bb27db28f05da9cf7d1ee7c", + "36cf030fbae767912593efea045bfff5", + "248d7aceabb7499febae663fae41a920", + }, + { + // 16X4 + "04dde98e632670e393704742c89f9067", + "8c72543f1664651ae1fa08e2ac0adb9b", + "2354a2cdc2773aa2df8ab4010db1be39", + "6300ad3221c26da39b10e0e6d87ee3be", + "8ea30b661c6ba60b28d3167f19e449b8", + "fb6c1e4ff101a371cede63c2955cdb7e", + "a517c06433d6d7927b16a72184a23e92", + "393828be5d62ab6c48668bea5e2f801a", + "b1e510c542013eb9d6fb188dea2ce90a", + "569a8f2fe01679ca216535ecbcdccb62", + }, + { + // 8X32 + "9d541865c185ca7607852852613ac1fc", + "b96be67f08c6b5fa5ebd3411299c2f7c", + "75a2dcf50004b9d188849b048239767e", + "429492ff415c9fd9b050d73b2ad500f8", + "64b3606c1ccd036bd766bd5711392cf4", + "cb59844a0f01660ac955bae3511f1100", + "3e076155b7a70e8828618e3f33b51e3d", + "ed2d1f597ab7c50beff690f737cf9726", + "7909c6a26aaf20c59d996d3e5b5f9c29", + "965798807240c98c6f7cc9b457ed0773", + }, + { + // 32X8 + "36f391aa31619eec1f4d9ee95ea454cc", + "b82648f14eeba2527357cb50bc3223cb", + "7a7b2adf429125e8bee9d1d00a66e13f", + "4198e4d6ba503b7cc2d7e96bb845f661", + "96c160d2ec1be9fe0cdea9682f14d257", + "19a450bcebaa75afb4fc6bd1fd6434af", + "2bd2e35967d43d0ec1c6587a36f204d5", + "49799a99aa4ccfbd989bee92a99422f1", + "955530e99813812a74659edeac3f5475", + "f0316b84e378a19cd11b19a6e40b2914", + }, + { + // 16X64 + "8cba1b70a0bde29e8ef235cedc5faa7d", + "96d00ddc7537bf7f196006591b733b4e", + "cbf69d5d157c9f3355a4757b1d6e3414", + "3ac1f642019493dec1b737d7a3a1b4e5", + "35f9ee300d7fa3c97338e81a6f21dcd4", + "aae335442e77c8ebc280f16ea50ba9c7", + "a6140fdac2278644328be094d88731db", + "2df93621b6ff100f7008432d509f4161", + "c77bf5aee39e7ed4a3dd715f816f452a", + "02109bd63557d90225c32a8f1338258e", + }, + { + // 64X16 + "a5e2f9fb685d5f4a048e9a96affd25a4", + "1348f249690d9eefe09d9ad7ead2c801", + "525da4b187acd81b1ff1116b60461141", + "e99d072de858094c98b01bd4a6772634", + "873bfa9dc24693f19721f7c8d527f7d3", + "0acfc6507bd3468e9679efc127d6e4b9", + "57d03f8d079c7264854e22ac1157cfae", + "6c2c4036f70c7d957a9399b5436c0774", + "42b8e4a97b7f8416c72a5148c031c0b1", + "a38a2c5f79993dfae8530e9e25800893", + }, +}; + +} // namespace + +#define HIGHBD_INTRA_PRED_TEST(arch, tx_size, dc, dc_left, dc_top, dc_128, v, \ + h, paeth, smooth, smooth_v, smooth_h) \ + TEST(arch, DISABLED_##TestHighbdIntraPred_##tx_size) { \ + static const AvxHighbdPredFunc aom_intra_pred[] = { \ + dc, dc_left, dc_top, dc_128, v, h, paeth, smooth, smooth_v, smooth_h \ + }; \ + TestHighbdIntraPred(tx_size, aom_intra_pred, kHighbdSignatures[tx_size]); \ + } + +// ----------------------------------------------------------------------------- +// 4x4, 4x8, 4x16 + +HIGHBD_INTRA_PRED_TEST( + C, TX_4X4, aom_highbd_dc_predictor_4x4_c, + aom_highbd_dc_left_predictor_4x4_c, aom_highbd_dc_top_predictor_4x4_c, + aom_highbd_dc_128_predictor_4x4_c, aom_highbd_v_predictor_4x4_c, + aom_highbd_h_predictor_4x4_c, aom_highbd_paeth_predictor_4x4_c, + aom_highbd_smooth_predictor_4x4_c, aom_highbd_smooth_v_predictor_4x4_c, + aom_highbd_smooth_h_predictor_4x4_c) + +HIGHBD_INTRA_PRED_TEST( + C, TX_4X8, aom_highbd_dc_predictor_4x8_c, + aom_highbd_dc_left_predictor_4x8_c, aom_highbd_dc_top_predictor_4x8_c, + aom_highbd_dc_128_predictor_4x8_c, aom_highbd_v_predictor_4x8_c, + aom_highbd_h_predictor_4x8_c, aom_highbd_paeth_predictor_4x8_c, + aom_highbd_smooth_predictor_4x8_c, aom_highbd_smooth_v_predictor_4x8_c, + aom_highbd_smooth_h_predictor_4x8_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_4X16, aom_highbd_dc_predictor_4x16_c, + aom_highbd_dc_left_predictor_4x16_c, aom_highbd_dc_top_predictor_4x16_c, + aom_highbd_dc_128_predictor_4x16_c, aom_highbd_v_predictor_4x16_c, + aom_highbd_h_predictor_4x16_c, aom_highbd_paeth_predictor_4x16_c, + aom_highbd_smooth_predictor_4x16_c, aom_highbd_smooth_v_predictor_4x16_c, + aom_highbd_smooth_h_predictor_4x16_c) +#if HAVE_SSE2 +HIGHBD_INTRA_PRED_TEST(SSE2, TX_4X4, aom_highbd_dc_predictor_4x4_sse2, + aom_highbd_dc_left_predictor_4x4_sse2, + aom_highbd_dc_top_predictor_4x4_sse2, + aom_highbd_dc_128_predictor_4x4_sse2, + aom_highbd_v_predictor_4x4_sse2, + aom_highbd_h_predictor_4x4_sse2, nullptr, nullptr, + nullptr, nullptr) + +HIGHBD_INTRA_PRED_TEST(SSE2, TX_4X8, aom_highbd_dc_predictor_4x8_sse2, + aom_highbd_dc_left_predictor_4x8_sse2, + aom_highbd_dc_top_predictor_4x8_sse2, + aom_highbd_dc_128_predictor_4x8_sse2, + aom_highbd_v_predictor_4x8_sse2, + aom_highbd_h_predictor_4x8_sse2, nullptr, nullptr, + nullptr, nullptr) +#endif +#if HAVE_NEON +HIGHBD_INTRA_PRED_TEST(NEON, TX_4X4, aom_highbd_dc_predictor_4x4_neon, + aom_highbd_dc_left_predictor_4x4_neon, + aom_highbd_dc_top_predictor_4x4_neon, + aom_highbd_dc_128_predictor_4x4_neon, + aom_highbd_v_predictor_4x4_neon, + aom_highbd_h_predictor_4x4_neon, + aom_highbd_paeth_predictor_4x4_neon, + aom_highbd_smooth_predictor_4x4_neon, + aom_highbd_smooth_v_predictor_4x4_neon, + aom_highbd_smooth_h_predictor_4x4_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_4X8, aom_highbd_dc_predictor_4x8_neon, + aom_highbd_dc_left_predictor_4x8_neon, + aom_highbd_dc_top_predictor_4x8_neon, + aom_highbd_dc_128_predictor_4x8_neon, + aom_highbd_v_predictor_4x8_neon, + aom_highbd_h_predictor_4x8_neon, + aom_highbd_paeth_predictor_4x8_neon, + aom_highbd_smooth_predictor_4x8_neon, + aom_highbd_smooth_v_predictor_4x8_neon, + aom_highbd_smooth_h_predictor_4x8_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_4X16, aom_highbd_dc_predictor_4x16_neon, + aom_highbd_dc_left_predictor_4x16_neon, + aom_highbd_dc_top_predictor_4x16_neon, + aom_highbd_dc_128_predictor_4x16_neon, + aom_highbd_v_predictor_4x16_neon, + aom_highbd_h_predictor_4x16_neon, + aom_highbd_paeth_predictor_4x16_neon, + aom_highbd_smooth_predictor_4x16_neon, + aom_highbd_smooth_v_predictor_4x16_neon, + aom_highbd_smooth_h_predictor_4x16_neon) +#endif // HAVE_NEON + +// ----------------------------------------------------------------------------- +// 8x8, 8x4, 8x16, 8x32 + +HIGHBD_INTRA_PRED_TEST( + C, TX_8X8, aom_highbd_dc_predictor_8x8_c, + aom_highbd_dc_left_predictor_8x8_c, aom_highbd_dc_top_predictor_8x8_c, + aom_highbd_dc_128_predictor_8x8_c, aom_highbd_v_predictor_8x8_c, + aom_highbd_h_predictor_8x8_c, aom_highbd_paeth_predictor_8x8_c, + aom_highbd_smooth_predictor_8x8_c, aom_highbd_smooth_v_predictor_8x8_c, + aom_highbd_smooth_h_predictor_8x8_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_8X4, aom_highbd_dc_predictor_8x4_c, + aom_highbd_dc_left_predictor_8x4_c, aom_highbd_dc_top_predictor_8x4_c, + aom_highbd_dc_128_predictor_8x4_c, aom_highbd_v_predictor_8x4_c, + aom_highbd_h_predictor_8x4_c, aom_highbd_paeth_predictor_8x4_c, + aom_highbd_smooth_predictor_8x4_c, aom_highbd_smooth_v_predictor_8x4_c, + aom_highbd_smooth_h_predictor_8x4_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_8X16, aom_highbd_dc_predictor_8x16_c, + aom_highbd_dc_left_predictor_8x16_c, aom_highbd_dc_top_predictor_8x16_c, + aom_highbd_dc_128_predictor_8x16_c, aom_highbd_v_predictor_8x16_c, + aom_highbd_h_predictor_8x16_c, aom_highbd_paeth_predictor_8x16_c, + aom_highbd_smooth_predictor_8x16_c, aom_highbd_smooth_v_predictor_8x16_c, + aom_highbd_smooth_h_predictor_8x16_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_8X32, aom_highbd_dc_predictor_8x32_c, + aom_highbd_dc_left_predictor_8x32_c, aom_highbd_dc_top_predictor_8x32_c, + aom_highbd_dc_128_predictor_8x32_c, aom_highbd_v_predictor_8x32_c, + aom_highbd_h_predictor_8x32_c, aom_highbd_paeth_predictor_8x32_c, + aom_highbd_smooth_predictor_8x32_c, aom_highbd_smooth_v_predictor_8x32_c, + aom_highbd_smooth_h_predictor_8x32_c) + +#if HAVE_SSE2 +HIGHBD_INTRA_PRED_TEST(SSE2, TX_8X8, aom_highbd_dc_predictor_8x8_sse2, + aom_highbd_dc_left_predictor_8x8_sse2, + aom_highbd_dc_top_predictor_8x8_sse2, + aom_highbd_dc_128_predictor_8x8_sse2, + aom_highbd_v_predictor_8x8_sse2, + aom_highbd_h_predictor_8x8_sse2, nullptr, nullptr, + nullptr, nullptr) +HIGHBD_INTRA_PRED_TEST(SSE2, TX_8X4, aom_highbd_dc_predictor_8x4_sse2, + aom_highbd_dc_left_predictor_8x4_sse2, + aom_highbd_dc_top_predictor_8x4_sse2, + aom_highbd_dc_128_predictor_8x4_sse2, + aom_highbd_v_predictor_8x4_sse2, + aom_highbd_h_predictor_8x4_sse2, nullptr, nullptr, + nullptr, nullptr) +HIGHBD_INTRA_PRED_TEST(SSE2, TX_8X16, aom_highbd_dc_predictor_8x16_sse2, + aom_highbd_dc_left_predictor_8x16_sse2, + aom_highbd_dc_top_predictor_8x16_sse2, + aom_highbd_dc_128_predictor_8x16_sse2, + aom_highbd_v_predictor_8x16_sse2, + aom_highbd_h_predictor_8x16_sse2, nullptr, nullptr, + nullptr, nullptr) +#endif + +#if HAVE_SSSE3 +HIGHBD_INTRA_PRED_TEST(SSSE3, TX_8X8, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr) +#endif + +#if HAVE_NEON +HIGHBD_INTRA_PRED_TEST(NEON, TX_8X8, aom_highbd_dc_predictor_8x8_neon, + aom_highbd_dc_left_predictor_8x8_neon, + aom_highbd_dc_top_predictor_8x8_neon, + aom_highbd_dc_128_predictor_8x8_neon, + aom_highbd_v_predictor_8x8_neon, + aom_highbd_h_predictor_8x8_neon, + aom_highbd_paeth_predictor_8x8_neon, + aom_highbd_smooth_predictor_8x8_neon, + aom_highbd_smooth_v_predictor_8x8_neon, + aom_highbd_smooth_h_predictor_8x8_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_8X4, aom_highbd_dc_predictor_8x4_neon, + aom_highbd_dc_left_predictor_8x4_neon, + aom_highbd_dc_top_predictor_8x4_neon, + aom_highbd_dc_128_predictor_8x4_neon, + aom_highbd_v_predictor_8x4_neon, + aom_highbd_h_predictor_8x4_neon, + aom_highbd_paeth_predictor_8x4_neon, + aom_highbd_smooth_predictor_8x4_neon, + aom_highbd_smooth_v_predictor_8x4_neon, + aom_highbd_smooth_h_predictor_8x4_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_8X16, aom_highbd_dc_predictor_8x16_neon, + aom_highbd_dc_left_predictor_8x16_neon, + aom_highbd_dc_top_predictor_8x16_neon, + aom_highbd_dc_128_predictor_8x16_neon, + aom_highbd_v_predictor_8x16_neon, + aom_highbd_h_predictor_8x16_neon, + aom_highbd_paeth_predictor_8x16_neon, + aom_highbd_smooth_predictor_8x16_neon, + aom_highbd_smooth_v_predictor_8x16_neon, + aom_highbd_smooth_h_predictor_8x16_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_8X32, aom_highbd_dc_predictor_8x32_neon, + aom_highbd_dc_left_predictor_8x32_neon, + aom_highbd_dc_top_predictor_8x32_neon, + aom_highbd_dc_128_predictor_8x32_neon, + aom_highbd_v_predictor_8x32_neon, + aom_highbd_h_predictor_8x32_neon, + aom_highbd_paeth_predictor_8x32_neon, + aom_highbd_smooth_predictor_8x32_neon, + aom_highbd_smooth_v_predictor_8x32_neon, + aom_highbd_smooth_h_predictor_8x32_neon) +#endif // HAVE_NEON + +// ----------------------------------------------------------------------------- +// 16x16, 16x8, 16x32, 16x4, 16x64 + +HIGHBD_INTRA_PRED_TEST( + C, TX_16X16, aom_highbd_dc_predictor_16x16_c, + aom_highbd_dc_left_predictor_16x16_c, aom_highbd_dc_top_predictor_16x16_c, + aom_highbd_dc_128_predictor_16x16_c, aom_highbd_v_predictor_16x16_c, + aom_highbd_h_predictor_16x16_c, aom_highbd_paeth_predictor_16x16_c, + aom_highbd_smooth_predictor_16x16_c, aom_highbd_smooth_v_predictor_16x16_c, + aom_highbd_smooth_h_predictor_16x16_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_16X8, aom_highbd_dc_predictor_16x8_c, + aom_highbd_dc_left_predictor_16x8_c, aom_highbd_dc_top_predictor_16x8_c, + aom_highbd_dc_128_predictor_16x8_c, aom_highbd_v_predictor_16x8_c, + aom_highbd_h_predictor_16x8_c, aom_highbd_paeth_predictor_16x8_c, + aom_highbd_smooth_predictor_16x8_c, aom_highbd_smooth_v_predictor_16x8_c, + aom_highbd_smooth_h_predictor_16x8_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_16X32, aom_highbd_dc_predictor_16x32_c, + aom_highbd_dc_left_predictor_16x32_c, aom_highbd_dc_top_predictor_16x32_c, + aom_highbd_dc_128_predictor_16x32_c, aom_highbd_v_predictor_16x32_c, + aom_highbd_h_predictor_16x32_c, aom_highbd_paeth_predictor_16x32_c, + aom_highbd_smooth_predictor_16x32_c, aom_highbd_smooth_v_predictor_16x32_c, + aom_highbd_smooth_h_predictor_16x32_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_16X4, aom_highbd_dc_predictor_16x4_c, + aom_highbd_dc_left_predictor_16x4_c, aom_highbd_dc_top_predictor_16x4_c, + aom_highbd_dc_128_predictor_16x4_c, aom_highbd_v_predictor_16x4_c, + aom_highbd_h_predictor_16x4_c, aom_highbd_paeth_predictor_16x4_c, + aom_highbd_smooth_predictor_16x4_c, aom_highbd_smooth_v_predictor_16x4_c, + aom_highbd_smooth_h_predictor_16x4_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_16X64, aom_highbd_dc_predictor_16x64_c, + aom_highbd_dc_left_predictor_16x64_c, aom_highbd_dc_top_predictor_16x64_c, + aom_highbd_dc_128_predictor_16x64_c, aom_highbd_v_predictor_16x64_c, + aom_highbd_h_predictor_16x64_c, aom_highbd_paeth_predictor_16x64_c, + aom_highbd_smooth_predictor_16x64_c, aom_highbd_smooth_v_predictor_16x64_c, + aom_highbd_smooth_h_predictor_16x64_c) + +#if HAVE_SSE2 +HIGHBD_INTRA_PRED_TEST(SSE2, TX_16X16, aom_highbd_dc_predictor_16x16_sse2, + aom_highbd_dc_left_predictor_16x16_sse2, + aom_highbd_dc_top_predictor_16x16_sse2, + aom_highbd_dc_128_predictor_16x16_sse2, + aom_highbd_v_predictor_16x16_sse2, + aom_highbd_h_predictor_16x16_sse2, nullptr, nullptr, + nullptr, nullptr) +HIGHBD_INTRA_PRED_TEST(SSE2, TX_16X8, aom_highbd_dc_predictor_16x8_sse2, + aom_highbd_dc_left_predictor_16x8_sse2, + aom_highbd_dc_top_predictor_16x8_sse2, + aom_highbd_dc_128_predictor_16x8_sse2, + aom_highbd_v_predictor_16x8_sse2, + aom_highbd_h_predictor_16x8_sse2, nullptr, nullptr, + nullptr, nullptr) +HIGHBD_INTRA_PRED_TEST(SSE2, TX_16X32, aom_highbd_dc_predictor_16x32_sse2, + aom_highbd_dc_left_predictor_16x32_sse2, + aom_highbd_dc_top_predictor_16x32_sse2, + aom_highbd_dc_128_predictor_16x32_sse2, + aom_highbd_v_predictor_16x32_sse2, + aom_highbd_h_predictor_16x32_sse2, nullptr, nullptr, + nullptr, nullptr) +#endif + +#if HAVE_SSSE3 +HIGHBD_INTRA_PRED_TEST(SSSE3, TX_16X16, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr) +#endif + +#if HAVE_AVX2 +HIGHBD_INTRA_PRED_TEST(AVX2, TX_16X16, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr) + +HIGHBD_INTRA_PRED_TEST(AVX2, TX_16X8, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr) + +HIGHBD_INTRA_PRED_TEST(AVX2, TX_16X32, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr) +#endif + +#if HAVE_NEON +HIGHBD_INTRA_PRED_TEST(NEON, TX_16X16, aom_highbd_dc_predictor_16x16_neon, + aom_highbd_dc_left_predictor_16x16_neon, + aom_highbd_dc_top_predictor_16x16_neon, + aom_highbd_dc_128_predictor_16x16_neon, + aom_highbd_v_predictor_16x16_neon, + aom_highbd_h_predictor_16x16_neon, + aom_highbd_paeth_predictor_16x16_neon, + aom_highbd_smooth_predictor_16x16_neon, + aom_highbd_smooth_v_predictor_16x16_neon, + aom_highbd_smooth_h_predictor_16x16_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_16X8, aom_highbd_dc_predictor_16x8_neon, + aom_highbd_dc_left_predictor_16x8_neon, + aom_highbd_dc_top_predictor_16x8_neon, + aom_highbd_dc_128_predictor_16x8_neon, + aom_highbd_v_predictor_16x8_neon, + aom_highbd_h_predictor_16x8_neon, + aom_highbd_paeth_predictor_16x8_neon, + aom_highbd_smooth_predictor_16x8_neon, + aom_highbd_smooth_v_predictor_16x8_neon, + aom_highbd_smooth_h_predictor_16x8_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_16X32, aom_highbd_dc_predictor_16x32_neon, + aom_highbd_dc_left_predictor_16x32_neon, + aom_highbd_dc_top_predictor_16x32_neon, + aom_highbd_dc_128_predictor_16x32_neon, + aom_highbd_v_predictor_16x32_neon, + aom_highbd_h_predictor_16x32_neon, + aom_highbd_paeth_predictor_16x32_neon, + aom_highbd_smooth_predictor_16x32_neon, + aom_highbd_smooth_v_predictor_16x32_neon, + aom_highbd_smooth_h_predictor_16x32_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_16X4, aom_highbd_dc_predictor_16x4_neon, + aom_highbd_dc_left_predictor_16x4_neon, + aom_highbd_dc_top_predictor_16x4_neon, + aom_highbd_dc_128_predictor_16x4_neon, + aom_highbd_v_predictor_16x4_neon, + aom_highbd_h_predictor_16x4_neon, + aom_highbd_paeth_predictor_16x4_neon, + aom_highbd_smooth_predictor_16x4_neon, + aom_highbd_smooth_v_predictor_16x4_neon, + aom_highbd_smooth_h_predictor_16x4_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_16X64, aom_highbd_dc_predictor_16x64_neon, + aom_highbd_dc_left_predictor_16x64_neon, + aom_highbd_dc_top_predictor_16x64_neon, + aom_highbd_dc_128_predictor_16x64_neon, + aom_highbd_v_predictor_16x64_neon, + aom_highbd_h_predictor_16x64_neon, + aom_highbd_paeth_predictor_16x64_neon, + aom_highbd_smooth_predictor_16x64_neon, + aom_highbd_smooth_v_predictor_16x64_neon, + aom_highbd_smooth_h_predictor_16x64_neon) +#endif // HAVE_NEON + +// ----------------------------------------------------------------------------- +// 32x32, 32x16, 32x64, 32x8 + +HIGHBD_INTRA_PRED_TEST( + C, TX_32X32, aom_highbd_dc_predictor_32x32_c, + aom_highbd_dc_left_predictor_32x32_c, aom_highbd_dc_top_predictor_32x32_c, + aom_highbd_dc_128_predictor_32x32_c, aom_highbd_v_predictor_32x32_c, + aom_highbd_h_predictor_32x32_c, aom_highbd_paeth_predictor_32x32_c, + aom_highbd_smooth_predictor_32x32_c, aom_highbd_smooth_v_predictor_32x32_c, + aom_highbd_smooth_h_predictor_32x32_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_32X16, aom_highbd_dc_predictor_32x16_c, + aom_highbd_dc_left_predictor_32x16_c, aom_highbd_dc_top_predictor_32x16_c, + aom_highbd_dc_128_predictor_32x16_c, aom_highbd_v_predictor_32x16_c, + aom_highbd_h_predictor_32x16_c, aom_highbd_paeth_predictor_32x16_c, + aom_highbd_smooth_predictor_32x16_c, aom_highbd_smooth_v_predictor_32x16_c, + aom_highbd_smooth_h_predictor_32x16_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_32X64, aom_highbd_dc_predictor_32x64_c, + aom_highbd_dc_left_predictor_32x64_c, aom_highbd_dc_top_predictor_32x64_c, + aom_highbd_dc_128_predictor_32x64_c, aom_highbd_v_predictor_32x64_c, + aom_highbd_h_predictor_32x64_c, aom_highbd_paeth_predictor_32x64_c, + aom_highbd_smooth_predictor_32x64_c, aom_highbd_smooth_v_predictor_32x64_c, + aom_highbd_smooth_h_predictor_32x64_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_32X8, aom_highbd_dc_predictor_32x8_c, + aom_highbd_dc_left_predictor_32x8_c, aom_highbd_dc_top_predictor_32x8_c, + aom_highbd_dc_128_predictor_32x8_c, aom_highbd_v_predictor_32x8_c, + aom_highbd_h_predictor_32x8_c, aom_highbd_paeth_predictor_32x8_c, + aom_highbd_smooth_predictor_32x8_c, aom_highbd_smooth_v_predictor_32x8_c, + aom_highbd_smooth_h_predictor_32x8_c) + +#if HAVE_SSE2 +HIGHBD_INTRA_PRED_TEST(SSE2, TX_32X32, aom_highbd_dc_predictor_32x32_sse2, + aom_highbd_dc_left_predictor_32x32_sse2, + aom_highbd_dc_top_predictor_32x32_sse2, + aom_highbd_dc_128_predictor_32x32_sse2, + aom_highbd_v_predictor_32x32_sse2, + aom_highbd_h_predictor_32x32_sse2, nullptr, nullptr, + nullptr, nullptr) +HIGHBD_INTRA_PRED_TEST(SSE2, TX_32X16, aom_highbd_dc_predictor_32x16_sse2, + aom_highbd_dc_left_predictor_32x16_sse2, + aom_highbd_dc_top_predictor_32x16_sse2, + aom_highbd_dc_128_predictor_32x16_sse2, + aom_highbd_v_predictor_32x16_sse2, + aom_highbd_h_predictor_32x16_sse2, nullptr, nullptr, + nullptr, nullptr) +#endif + +#if HAVE_SSSE3 +HIGHBD_INTRA_PRED_TEST(SSSE3, TX_32X32, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr) +#endif + +#if HAVE_AVX2 +HIGHBD_INTRA_PRED_TEST(AVX2, TX_32X32, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr) + +HIGHBD_INTRA_PRED_TEST(AVX2, TX_32X16, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr) +#endif + +#if HAVE_NEON +HIGHBD_INTRA_PRED_TEST(NEON, TX_32X32, aom_highbd_dc_predictor_32x32_neon, + aom_highbd_dc_left_predictor_32x32_neon, + aom_highbd_dc_top_predictor_32x32_neon, + aom_highbd_dc_128_predictor_32x32_neon, + aom_highbd_v_predictor_32x32_neon, + aom_highbd_h_predictor_32x32_neon, + aom_highbd_paeth_predictor_32x32_neon, + aom_highbd_smooth_predictor_32x32_neon, + aom_highbd_smooth_v_predictor_32x32_neon, + aom_highbd_smooth_h_predictor_32x32_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_32X16, aom_highbd_dc_predictor_32x16_neon, + aom_highbd_dc_left_predictor_32x16_neon, + aom_highbd_dc_top_predictor_32x16_neon, + aom_highbd_dc_128_predictor_32x16_neon, + aom_highbd_v_predictor_32x16_neon, + aom_highbd_h_predictor_32x16_neon, + aom_highbd_paeth_predictor_32x16_neon, + aom_highbd_smooth_predictor_32x16_neon, + aom_highbd_smooth_v_predictor_32x16_neon, + aom_highbd_smooth_h_predictor_32x16_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_32X64, aom_highbd_dc_predictor_32x64_neon, + aom_highbd_dc_left_predictor_32x64_neon, + aom_highbd_dc_top_predictor_32x64_neon, + aom_highbd_dc_128_predictor_32x64_neon, + aom_highbd_v_predictor_32x64_neon, + aom_highbd_h_predictor_32x64_neon, + aom_highbd_paeth_predictor_32x64_neon, + aom_highbd_smooth_predictor_32x64_neon, + aom_highbd_smooth_v_predictor_32x64_neon, + aom_highbd_smooth_h_predictor_32x64_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_32X8, aom_highbd_dc_predictor_32x8_neon, + aom_highbd_dc_left_predictor_32x8_neon, + aom_highbd_dc_top_predictor_32x8_neon, + aom_highbd_dc_128_predictor_32x8_neon, + aom_highbd_v_predictor_32x8_neon, + aom_highbd_h_predictor_32x8_neon, + aom_highbd_paeth_predictor_32x8_neon, + aom_highbd_smooth_predictor_32x8_neon, + aom_highbd_smooth_v_predictor_32x8_neon, + aom_highbd_smooth_h_predictor_32x8_neon) +#endif // HAVE_NEON + +// ----------------------------------------------------------------------------- +// 64x64, 64x32, 64x16 + +HIGHBD_INTRA_PRED_TEST( + C, TX_64X64, aom_highbd_dc_predictor_64x64_c, + aom_highbd_dc_left_predictor_64x64_c, aom_highbd_dc_top_predictor_64x64_c, + aom_highbd_dc_128_predictor_64x64_c, aom_highbd_v_predictor_64x64_c, + aom_highbd_h_predictor_64x64_c, aom_highbd_paeth_predictor_64x64_c, + aom_highbd_smooth_predictor_64x64_c, aom_highbd_smooth_v_predictor_64x64_c, + aom_highbd_smooth_h_predictor_64x64_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_64X32, aom_highbd_dc_predictor_64x32_c, + aom_highbd_dc_left_predictor_64x32_c, aom_highbd_dc_top_predictor_64x32_c, + aom_highbd_dc_128_predictor_64x32_c, aom_highbd_v_predictor_64x32_c, + aom_highbd_h_predictor_64x32_c, aom_highbd_paeth_predictor_64x32_c, + aom_highbd_smooth_predictor_64x32_c, aom_highbd_smooth_v_predictor_64x32_c, + aom_highbd_smooth_h_predictor_64x32_c) +HIGHBD_INTRA_PRED_TEST( + C, TX_64X16, aom_highbd_dc_predictor_64x16_c, + aom_highbd_dc_left_predictor_64x16_c, aom_highbd_dc_top_predictor_64x16_c, + aom_highbd_dc_128_predictor_64x16_c, aom_highbd_v_predictor_64x16_c, + aom_highbd_h_predictor_64x16_c, aom_highbd_paeth_predictor_64x16_c, + aom_highbd_smooth_predictor_64x16_c, aom_highbd_smooth_v_predictor_64x16_c, + aom_highbd_smooth_h_predictor_64x16_c) + +#if HAVE_NEON +HIGHBD_INTRA_PRED_TEST(NEON, TX_64X64, aom_highbd_dc_predictor_64x64_neon, + aom_highbd_dc_left_predictor_64x64_neon, + aom_highbd_dc_top_predictor_64x64_neon, + aom_highbd_dc_128_predictor_64x64_neon, + aom_highbd_v_predictor_64x64_neon, + aom_highbd_h_predictor_64x64_neon, + aom_highbd_paeth_predictor_64x64_neon, + aom_highbd_smooth_predictor_64x64_neon, + aom_highbd_smooth_v_predictor_64x64_neon, + aom_highbd_smooth_h_predictor_64x64_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_64X32, aom_highbd_dc_predictor_64x32_neon, + aom_highbd_dc_left_predictor_64x32_neon, + aom_highbd_dc_top_predictor_64x32_neon, + aom_highbd_dc_128_predictor_64x32_neon, + aom_highbd_v_predictor_64x32_neon, + aom_highbd_h_predictor_64x32_neon, + aom_highbd_paeth_predictor_64x32_neon, + aom_highbd_smooth_predictor_64x32_neon, + aom_highbd_smooth_v_predictor_64x32_neon, + aom_highbd_smooth_h_predictor_64x32_neon) +HIGHBD_INTRA_PRED_TEST(NEON, TX_64X16, aom_highbd_dc_predictor_64x16_neon, + aom_highbd_dc_left_predictor_64x16_neon, + aom_highbd_dc_top_predictor_64x16_neon, + aom_highbd_dc_128_predictor_64x16_neon, + aom_highbd_v_predictor_64x16_neon, + aom_highbd_h_predictor_64x16_neon, + aom_highbd_paeth_predictor_64x16_neon, + aom_highbd_smooth_predictor_64x16_neon, + aom_highbd_smooth_v_predictor_64x16_neon, + aom_highbd_smooth_h_predictor_64x16_neon) +#endif // HAVE_NEON + +// ----------------------------------------------------------------------------- +#endif // CONFIG_AV1_HIGHBITDEPTH + +#include "test/test_libaom.cc" diff --git a/third_party/aom/test/test_libaom.cc b/third_party/aom/test/test_libaom.cc new file mode 100644 index 0000000000..fbd7f2e380 --- /dev/null +++ b/third_party/aom/test/test_libaom.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" + +#if !CONFIG_SHARED +#include <string.h> + +#include <string> + +#if AOM_ARCH_ARM +#include "aom_ports/arm.h" +#endif +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 +#include "aom_ports/x86.h" +#endif + +extern "C" { +extern void av1_rtcd(); +extern void aom_dsp_rtcd(); +extern void aom_scale_rtcd(); +} + +#if AOM_ARCH_ARM || AOM_ARCH_X86 || AOM_ARCH_X86_64 +static void append_negative_gtest_filter(const char *str) { + std::string flag_value = GTEST_FLAG_GET(filter); + // Negative patterns begin with one '-' followed by a ':' separated list. + if (flag_value.find('-') == std::string::npos) flag_value += '-'; + // OPT.* matches TEST() functions + // OPT/* matches TEST_P() functions + // OPT_* matches tests which have been manually sharded. + // We do not match OPT* because of SSE/SSE2 collisions. + const char *search_terminators = "./_"; + for (size_t pos = 0; pos < strlen(search_terminators); ++pos) { + flag_value += ":"; + flag_value += str; + flag_value += search_terminators[pos]; + flag_value += "*"; + } + GTEST_FLAG_SET(filter, flag_value); +} +#endif // AOM_ARCH_ARM || AOM_ARCH_X86 || AOM_ARCH_X86_64 +#endif // !CONFIG_SHARED + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + +#if !CONFIG_SHARED +#if AOM_ARCH_AARCH64 + const int caps = aom_arm_cpu_caps(); + if (!(caps & HAS_ARM_CRC32)) append_negative_gtest_filter("ARM_CRC32"); + if (!(caps & HAS_NEON_DOTPROD)) append_negative_gtest_filter("NEON_DOTPROD"); + if (!(caps & HAS_NEON_I8MM)) append_negative_gtest_filter("NEON_I8MM"); + if (!(caps & HAS_SVE)) append_negative_gtest_filter("SVE"); +#elif AOM_ARCH_ARM + const int caps = aom_arm_cpu_caps(); + if (!(caps & HAS_NEON)) append_negative_gtest_filter("NEON"); +#endif // AOM_ARCH_ARM + +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 + const int simd_caps = x86_simd_caps(); + if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter("MMX"); + if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter("SSE"); + if (!(simd_caps & HAS_SSE2)) append_negative_gtest_filter("SSE2"); + if (!(simd_caps & HAS_SSE3)) append_negative_gtest_filter("SSE3"); + if (!(simd_caps & HAS_SSSE3)) append_negative_gtest_filter("SSSE3"); + if (!(simd_caps & HAS_SSE4_1)) append_negative_gtest_filter("SSE4_1"); + if (!(simd_caps & HAS_SSE4_2)) append_negative_gtest_filter("SSE4_2"); + if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter("AVX"); + if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter("AVX2"); +#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64 + + // Shared library builds don't support whitebox tests that exercise internal + // symbols. + av1_rtcd(); + aom_dsp_rtcd(); + aom_scale_rtcd(); +#endif // !CONFIG_SHARED + + return RUN_ALL_TESTS(); +} diff --git a/third_party/aom/test/test_runner.cmake b/third_party/aom/test/test_runner.cmake new file mode 100644 index 0000000000..f0648d16be --- /dev/null +++ b/third_party/aom/test/test_runner.cmake @@ -0,0 +1,28 @@ +# +# Copyright (c) 2017, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and the +# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was +# not distributed with this source code in the LICENSE file, you can obtain it +# at www.aomedia.org/license/software. If the Alliance for Open Media Patent +# License 1.0 was not distributed with this source code in the PATENTS file, you +# can obtain it at www.aomedia.org/license/patent. +# +if(NOT GTEST_TOTAL_SHARDS + OR "${GTEST_SHARD_INDEX}" STREQUAL "" + OR NOT TEST_LIBAOM) + message( + FATAL_ERROR + "The variables GTEST_SHARD_INDEX, GTEST_TOTAL_SHARDS and TEST_LIBAOM + must be defined.") +endif() + +set($ENV{GTEST_SHARD_INDEX} ${GTEST_SHARD_INDEX}) +set($ENV{GTEST_TOTAL_SHARDS} ${GTEST_TOTAL_SHARDS}) +execute_process(COMMAND ${TEST_LIBAOM} RESULT_VARIABLE test_result) +set(test_message "Test shard ${GTEST_SHARD_INDEX}/${GTEST_TOTAL_SHARDS} result") +message("${test_message}: ${test_result}") + +if(NOT "${test_result}" STREQUAL "0") + message(FATAL_ERROR "${test_message}: FAILED, non-zero exit code.") +endif() diff --git a/third_party/aom/test/test_vector_test.cc b/third_party/aom/test/test_vector_test.cc new file mode 100644 index 0000000000..39414e32e4 --- /dev/null +++ b/third_party/aom/test/test_vector_test.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdio> +#include <cstdlib> +#include <memory> +#include <set> +#include <string> +#include <tuple> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "common/tools_common.h" +#include "config/aom_config.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/ivf_video_source.h" +#include "test/md5_helper.h" +#include "test/test_vectors.h" +#include "test/util.h" +#if CONFIG_WEBM_IO +#include "test/webm_video_source.h" +#endif + +namespace { + +const int kThreads = 0; +const int kFileName = 1; +const int kRowMT = 2; + +typedef std::tuple<int, const char *, int> DecodeParam; + +class TestVectorTest : public ::libaom_test::DecoderTest, + public ::libaom_test::CodecTestWithParam<DecodeParam> { + protected: + TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(nullptr) {} + + ~TestVectorTest() override { + if (md5_file_) fclose(md5_file_); + } + + void OpenMD5File(const std::string &md5_file_name_) { + md5_file_ = libaom_test::OpenTestDataFile(md5_file_name_); + ASSERT_NE(md5_file_, nullptr) + << "Md5 file open failed. Filename: " << md5_file_name_; + } + + void PreDecodeFrameHook(const libaom_test::CompressedVideoSource &video, + libaom_test::Decoder *decoder) override { + if (video.frame_number() == 0) decoder->Control(AV1D_SET_ROW_MT, row_mt_); + } + + void DecompressedFrameHook(const aom_image_t &img, + const unsigned int frame_number) override { + ASSERT_NE(md5_file_, nullptr); + char expected_md5[33]; + char junk[128]; + + // Read correct md5 checksums. + const int res = fscanf(md5_file_, "%s %s", expected_md5, junk); + ASSERT_NE(res, EOF) << "Read md5 data failed"; + expected_md5[32] = '\0'; + + ::libaom_test::MD5 md5_res; +#if FORCE_HIGHBITDEPTH_DECODING + const aom_img_fmt_t shifted_fmt = + (aom_img_fmt)(img.fmt & ~AOM_IMG_FMT_HIGHBITDEPTH); + if (img.bit_depth == 8 && shifted_fmt != img.fmt) { + aom_image_t *img_shifted = + aom_img_alloc(nullptr, shifted_fmt, img.d_w, img.d_h, 16); + img_shifted->bit_depth = img.bit_depth; + img_shifted->monochrome = img.monochrome; + aom_img_downshift(img_shifted, &img, 0); + md5_res.Add(img_shifted); + aom_img_free(img_shifted); + } else { +#endif + md5_res.Add(&img); +#if FORCE_HIGHBITDEPTH_DECODING + } +#endif + + const char *actual_md5 = md5_res.Get(); + // Check md5 match. + ASSERT_STREQ(expected_md5, actual_md5) + << "Md5 checksums don't match: frame number = " << frame_number; + } + + unsigned int row_mt_; + + private: + FILE *md5_file_; +}; + +// This test runs through the whole set of test vectors, and decodes them. +// The md5 checksums are computed for each frame in the video file. If md5 +// checksums match the correct md5 data, then the test is passed. Otherwise, +// the test failed. +TEST_P(TestVectorTest, MD5Match) { + const DecodeParam input = GET_PARAM(1); + const std::string filename = std::get<kFileName>(input); + aom_codec_flags_t flags = 0; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + char str[256]; + + cfg.threads = std::get<kThreads>(input); + row_mt_ = std::get<kRowMT>(input); + + snprintf(str, sizeof(str) / sizeof(str[0]) - 1, "file: %s threads: %d", + filename.c_str(), cfg.threads); + SCOPED_TRACE(str); + + // Open compressed video file. + std::unique_ptr<libaom_test::CompressedVideoSource> video; + if (filename.substr(filename.length() - 3, 3) == "ivf") { + video.reset(new libaom_test::IVFVideoSource(filename)); + } else if (filename.substr(filename.length() - 4, 4) == "webm" || + filename.substr(filename.length() - 3, 3) == "mkv") { +#if CONFIG_WEBM_IO + video.reset(new libaom_test::WebMVideoSource(filename)); +#else + fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n", + filename.c_str()); + return; +#endif + } + ASSERT_NE(video, nullptr); + video->Init(); + + // Construct md5 file name. + const std::string md5_filename = filename + ".md5"; + OpenMD5File(md5_filename); + + // Set decode config and flags. + cfg.allow_lowbitdepth = !FORCE_HIGHBITDEPTH_DECODING; + set_cfg(cfg); + set_flags(flags); + + // Decode frame, and check the md5 matching. + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg)); +} + +#if CONFIG_AV1_DECODER +AV1_INSTANTIATE_TEST_SUITE( + TestVectorTest, + ::testing::Combine(::testing::Values(1), // Single thread. + ::testing::ValuesIn(libaom_test::kAV1TestVectors, + libaom_test::kAV1TestVectors + + libaom_test::kNumAV1TestVectors), + ::testing::Values(0))); + +// Test AV1 decode in with different numbers of threads. +INSTANTIATE_TEST_SUITE_P( + AV1MultiThreaded, TestVectorTest, + ::testing::Combine( + ::testing::Values( + static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)), + ::testing::Combine( + ::testing::Range(2, 9), // With 2 ~ 8 threads. + ::testing::ValuesIn(libaom_test::kAV1TestVectors, + libaom_test::kAV1TestVectors + + libaom_test::kNumAV1TestVectors), + ::testing::Range(0, 2)))); + +#endif // CONFIG_AV1_DECODER + +} // namespace diff --git a/third_party/aom/test/test_vectors.cc b/third_party/aom/test/test_vectors.cc new file mode 100644 index 0000000000..09736d1ed8 --- /dev/null +++ b/third_party/aom/test/test_vectors.cc @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "test/test_vectors.h" + +namespace libaom_test { + +#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0])) + +#if CONFIG_AV1_DECODER +const char *const kAV1TestVectors[] = { + "av1-1-b8-00-quantizer-00.ivf", + "av1-1-b8-00-quantizer-01.ivf", + "av1-1-b8-00-quantizer-02.ivf", + "av1-1-b8-00-quantizer-03.ivf", + "av1-1-b8-00-quantizer-04.ivf", + "av1-1-b8-00-quantizer-05.ivf", + "av1-1-b8-00-quantizer-06.ivf", + "av1-1-b8-00-quantizer-07.ivf", + "av1-1-b8-00-quantizer-08.ivf", + "av1-1-b8-00-quantizer-09.ivf", + "av1-1-b8-00-quantizer-10.ivf", + "av1-1-b8-00-quantizer-11.ivf", + "av1-1-b8-00-quantizer-12.ivf", + "av1-1-b8-00-quantizer-13.ivf", + "av1-1-b8-00-quantizer-14.ivf", + "av1-1-b8-00-quantizer-15.ivf", + "av1-1-b8-00-quantizer-16.ivf", + "av1-1-b8-00-quantizer-17.ivf", + "av1-1-b8-00-quantizer-18.ivf", + "av1-1-b8-00-quantizer-19.ivf", + "av1-1-b8-00-quantizer-20.ivf", + "av1-1-b8-00-quantizer-21.ivf", + "av1-1-b8-00-quantizer-22.ivf", + "av1-1-b8-00-quantizer-23.ivf", + "av1-1-b8-00-quantizer-24.ivf", + "av1-1-b8-00-quantizer-25.ivf", + "av1-1-b8-00-quantizer-26.ivf", + "av1-1-b8-00-quantizer-27.ivf", + "av1-1-b8-00-quantizer-28.ivf", + "av1-1-b8-00-quantizer-29.ivf", + "av1-1-b8-00-quantizer-30.ivf", + "av1-1-b8-00-quantizer-31.ivf", + "av1-1-b8-00-quantizer-32.ivf", + "av1-1-b8-00-quantizer-33.ivf", + "av1-1-b8-00-quantizer-34.ivf", + "av1-1-b8-00-quantizer-35.ivf", + "av1-1-b8-00-quantizer-36.ivf", + "av1-1-b8-00-quantizer-37.ivf", + "av1-1-b8-00-quantizer-38.ivf", + "av1-1-b8-00-quantizer-39.ivf", + "av1-1-b8-00-quantizer-40.ivf", + "av1-1-b8-00-quantizer-41.ivf", + "av1-1-b8-00-quantizer-42.ivf", + "av1-1-b8-00-quantizer-43.ivf", + "av1-1-b8-00-quantizer-44.ivf", + "av1-1-b8-00-quantizer-45.ivf", + "av1-1-b8-00-quantizer-46.ivf", + "av1-1-b8-00-quantizer-47.ivf", + "av1-1-b8-00-quantizer-48.ivf", + "av1-1-b8-00-quantizer-49.ivf", + "av1-1-b8-00-quantizer-50.ivf", + "av1-1-b8-00-quantizer-51.ivf", + "av1-1-b8-00-quantizer-52.ivf", + "av1-1-b8-00-quantizer-53.ivf", + "av1-1-b8-00-quantizer-54.ivf", + "av1-1-b8-00-quantizer-55.ivf", + "av1-1-b8-00-quantizer-56.ivf", + "av1-1-b8-00-quantizer-57.ivf", + "av1-1-b8-00-quantizer-58.ivf", + "av1-1-b8-00-quantizer-59.ivf", + "av1-1-b8-00-quantizer-60.ivf", + "av1-1-b8-00-quantizer-61.ivf", + "av1-1-b8-00-quantizer-62.ivf", + "av1-1-b8-00-quantizer-63.ivf", +#if CONFIG_AV1_HIGHBITDEPTH + "av1-1-b10-00-quantizer-00.ivf", + "av1-1-b10-00-quantizer-01.ivf", + "av1-1-b10-00-quantizer-02.ivf", + "av1-1-b10-00-quantizer-03.ivf", + "av1-1-b10-00-quantizer-04.ivf", + "av1-1-b10-00-quantizer-05.ivf", + "av1-1-b10-00-quantizer-06.ivf", + "av1-1-b10-00-quantizer-07.ivf", + "av1-1-b10-00-quantizer-08.ivf", + "av1-1-b10-00-quantizer-09.ivf", + "av1-1-b10-00-quantizer-10.ivf", + "av1-1-b10-00-quantizer-11.ivf", + "av1-1-b10-00-quantizer-12.ivf", + "av1-1-b10-00-quantizer-13.ivf", + "av1-1-b10-00-quantizer-14.ivf", + "av1-1-b10-00-quantizer-15.ivf", + "av1-1-b10-00-quantizer-16.ivf", + "av1-1-b10-00-quantizer-17.ivf", + "av1-1-b10-00-quantizer-18.ivf", + "av1-1-b10-00-quantizer-19.ivf", + "av1-1-b10-00-quantizer-20.ivf", + "av1-1-b10-00-quantizer-21.ivf", + "av1-1-b10-00-quantizer-22.ivf", + "av1-1-b10-00-quantizer-23.ivf", + "av1-1-b10-00-quantizer-24.ivf", + "av1-1-b10-00-quantizer-25.ivf", + "av1-1-b10-00-quantizer-26.ivf", + "av1-1-b10-00-quantizer-27.ivf", + "av1-1-b10-00-quantizer-28.ivf", + "av1-1-b10-00-quantizer-29.ivf", + "av1-1-b10-00-quantizer-30.ivf", + "av1-1-b10-00-quantizer-31.ivf", + "av1-1-b10-00-quantizer-32.ivf", + "av1-1-b10-00-quantizer-33.ivf", + "av1-1-b10-00-quantizer-34.ivf", + "av1-1-b10-00-quantizer-35.ivf", + "av1-1-b10-00-quantizer-36.ivf", + "av1-1-b10-00-quantizer-37.ivf", + "av1-1-b10-00-quantizer-38.ivf", + "av1-1-b10-00-quantizer-39.ivf", + "av1-1-b10-00-quantizer-40.ivf", + "av1-1-b10-00-quantizer-41.ivf", + "av1-1-b10-00-quantizer-42.ivf", + "av1-1-b10-00-quantizer-43.ivf", + "av1-1-b10-00-quantizer-44.ivf", + "av1-1-b10-00-quantizer-45.ivf", + "av1-1-b10-00-quantizer-46.ivf", + "av1-1-b10-00-quantizer-47.ivf", + "av1-1-b10-00-quantizer-48.ivf", + "av1-1-b10-00-quantizer-49.ivf", + "av1-1-b10-00-quantizer-50.ivf", + "av1-1-b10-00-quantizer-51.ivf", + "av1-1-b10-00-quantizer-52.ivf", + "av1-1-b10-00-quantizer-53.ivf", + "av1-1-b10-00-quantizer-54.ivf", + "av1-1-b10-00-quantizer-55.ivf", + "av1-1-b10-00-quantizer-56.ivf", + "av1-1-b10-00-quantizer-57.ivf", + "av1-1-b10-00-quantizer-58.ivf", + "av1-1-b10-00-quantizer-59.ivf", + "av1-1-b10-00-quantizer-60.ivf", + "av1-1-b10-00-quantizer-61.ivf", + "av1-1-b10-00-quantizer-62.ivf", + "av1-1-b10-00-quantizer-63.ivf", + "av1-1-b10-23-film_grain-50.ivf", + "av1-1-b10-24-monochrome.ivf", +#endif // CONFIG_AV1_HIGHBITDEPTH + "av1-1-b8-01-size-16x16.ivf", + "av1-1-b8-01-size-16x18.ivf", + "av1-1-b8-01-size-16x32.ivf", + "av1-1-b8-01-size-16x34.ivf", + "av1-1-b8-01-size-16x64.ivf", + "av1-1-b8-01-size-16x66.ivf", + "av1-1-b8-01-size-18x16.ivf", + "av1-1-b8-01-size-18x18.ivf", + "av1-1-b8-01-size-18x32.ivf", + "av1-1-b8-01-size-18x34.ivf", + "av1-1-b8-01-size-18x64.ivf", + "av1-1-b8-01-size-18x66.ivf", + "av1-1-b8-01-size-196x196.ivf", + "av1-1-b8-01-size-196x198.ivf", + "av1-1-b8-01-size-196x200.ivf", + "av1-1-b8-01-size-196x202.ivf", + "av1-1-b8-01-size-196x208.ivf", + "av1-1-b8-01-size-196x210.ivf", + "av1-1-b8-01-size-196x224.ivf", + "av1-1-b8-01-size-196x226.ivf", + "av1-1-b8-01-size-198x196.ivf", + "av1-1-b8-01-size-198x198.ivf", + "av1-1-b8-01-size-198x200.ivf", + "av1-1-b8-01-size-198x202.ivf", + "av1-1-b8-01-size-198x208.ivf", + "av1-1-b8-01-size-198x210.ivf", + "av1-1-b8-01-size-198x224.ivf", + "av1-1-b8-01-size-198x226.ivf", + "av1-1-b8-01-size-200x196.ivf", + "av1-1-b8-01-size-200x198.ivf", + "av1-1-b8-01-size-200x200.ivf", + "av1-1-b8-01-size-200x202.ivf", + "av1-1-b8-01-size-200x208.ivf", + "av1-1-b8-01-size-200x210.ivf", + "av1-1-b8-01-size-200x224.ivf", + "av1-1-b8-01-size-200x226.ivf", + "av1-1-b8-01-size-202x196.ivf", + "av1-1-b8-01-size-202x198.ivf", + "av1-1-b8-01-size-202x200.ivf", + "av1-1-b8-01-size-202x202.ivf", + "av1-1-b8-01-size-202x208.ivf", + "av1-1-b8-01-size-202x210.ivf", + "av1-1-b8-01-size-202x224.ivf", + "av1-1-b8-01-size-202x226.ivf", + "av1-1-b8-01-size-208x196.ivf", + "av1-1-b8-01-size-208x198.ivf", + "av1-1-b8-01-size-208x200.ivf", + "av1-1-b8-01-size-208x202.ivf", + "av1-1-b8-01-size-208x208.ivf", + "av1-1-b8-01-size-208x210.ivf", + "av1-1-b8-01-size-208x224.ivf", + "av1-1-b8-01-size-208x226.ivf", + "av1-1-b8-01-size-210x196.ivf", + "av1-1-b8-01-size-210x198.ivf", + "av1-1-b8-01-size-210x200.ivf", + "av1-1-b8-01-size-210x202.ivf", + "av1-1-b8-01-size-210x208.ivf", + "av1-1-b8-01-size-210x210.ivf", + "av1-1-b8-01-size-210x224.ivf", + "av1-1-b8-01-size-210x226.ivf", + "av1-1-b8-01-size-224x196.ivf", + "av1-1-b8-01-size-224x198.ivf", + "av1-1-b8-01-size-224x200.ivf", + "av1-1-b8-01-size-224x202.ivf", + "av1-1-b8-01-size-224x208.ivf", + "av1-1-b8-01-size-224x210.ivf", + "av1-1-b8-01-size-224x224.ivf", + "av1-1-b8-01-size-224x226.ivf", + "av1-1-b8-01-size-226x196.ivf", + "av1-1-b8-01-size-226x198.ivf", + "av1-1-b8-01-size-226x200.ivf", + "av1-1-b8-01-size-226x202.ivf", + "av1-1-b8-01-size-226x208.ivf", + "av1-1-b8-01-size-226x210.ivf", + "av1-1-b8-01-size-226x224.ivf", + "av1-1-b8-01-size-226x226.ivf", + "av1-1-b8-01-size-32x16.ivf", + "av1-1-b8-01-size-32x18.ivf", + "av1-1-b8-01-size-32x32.ivf", + "av1-1-b8-01-size-32x34.ivf", + "av1-1-b8-01-size-32x64.ivf", + "av1-1-b8-01-size-32x66.ivf", + "av1-1-b8-01-size-34x16.ivf", + "av1-1-b8-01-size-34x18.ivf", + "av1-1-b8-01-size-34x32.ivf", + "av1-1-b8-01-size-34x34.ivf", + "av1-1-b8-01-size-34x64.ivf", + "av1-1-b8-01-size-34x66.ivf", + "av1-1-b8-01-size-64x16.ivf", + "av1-1-b8-01-size-64x18.ivf", + "av1-1-b8-01-size-64x32.ivf", + "av1-1-b8-01-size-64x34.ivf", + "av1-1-b8-01-size-64x64.ivf", + "av1-1-b8-01-size-64x66.ivf", + "av1-1-b8-01-size-66x16.ivf", + "av1-1-b8-01-size-66x18.ivf", + "av1-1-b8-01-size-66x32.ivf", + "av1-1-b8-01-size-66x34.ivf", + "av1-1-b8-01-size-66x64.ivf", + "av1-1-b8-01-size-66x66.ivf", + "av1-1-b8-02-allintra.ivf", + "av1-1-b8-03-sizedown.mkv", + "av1-1-b8-03-sizeup.mkv", + "av1-1-b8-04-cdfupdate.ivf", + "av1-1-b8-05-mv.ivf", + "av1-1-b8-06-mfmv.ivf", + "av1-1-b8-16-intra_only-intrabc-extreme-dv.ivf", + "av1-1-b8-22-svc-L1T2.ivf", + "av1-1-b8-22-svc-L2T1.ivf", + "av1-1-b8-22-svc-L2T2.ivf", + "av1-1-b8-23-film_grain-50.ivf", + "av1-1-b8-24-monochrome.ivf" +}; +const int kNumAV1TestVectors = NELEMENTS(kAV1TestVectors); +#endif // CONFIG_AV1_DECODER + +} // namespace libaom_test diff --git a/third_party/aom/test/test_vectors.h b/third_party/aom/test/test_vectors.h new file mode 100644 index 0000000000..be37f6e377 --- /dev/null +++ b/third_party/aom/test/test_vectors.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_TEST_VECTORS_H_ +#define AOM_TEST_TEST_VECTORS_H_ + +#include "config/aom_config.h" + +namespace libaom_test { + +#if CONFIG_AV1_DECODER +extern const int kNumAV1TestVectors; +extern const char *const kAV1TestVectors[]; +#endif + +} // namespace libaom_test + +#endif // AOM_TEST_TEST_VECTORS_H_ diff --git a/third_party/aom/test/tile_config_test.cc b/third_party/aom/test/tile_config_test.cc new file mode 100644 index 0000000000..e2ac59284b --- /dev/null +++ b/third_party/aom/test/tile_config_test.cc @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "aom/aom_codec.h" +#include "aom_dsp/aom_dsp_common.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/y4m_video_source.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { +typedef struct { + // Superblock size + const unsigned int sb_size; + // log2(number of tile rows) + const unsigned int tile_rows; + // log2(number of tile columns) + const unsigned int tile_cols; +} uniformTileConfigParam; + +const libaom_test::TestMode kTestModeParams[] = +#if CONFIG_REALTIME_ONLY + { ::libaom_test::kRealTime }; +#else + { ::libaom_test::kRealTime, ::libaom_test::kOnePassGood, + ::libaom_test::kTwoPassGood }; +#endif + +static const uniformTileConfigParam uniformTileConfigParams[] = { + { 128, 0, 0 }, { 128, 0, 2 }, { 128, 2, 0 }, { 128, 1, 2 }, { 128, 2, 2 }, + { 128, 3, 2 }, { 64, 0, 0 }, { 64, 0, 2 }, { 64, 2, 0 }, { 64, 1, 2 }, + { 64, 2, 2 }, { 64, 3, 3 }, { 64, 4, 4 } +}; + +typedef struct { + // Superblock size + const unsigned int sb_size; + // number of tile widths + const unsigned int tile_width_count; + // list of tile widths + int tile_widths[AOM_MAX_TILE_COLS]; + // number of tile heights + const unsigned int tile_height_count; + // list of tile heights + int tile_heights[AOM_MAX_TILE_ROWS]; +} nonUniformTileConfigParam; + +const nonUniformTileConfigParam nonUniformTileConfigParams[] = { + { 64, 1, { 3 }, 1, { 3 } }, { 64, 2, { 1, 2 }, 2, { 1, 2 } }, + { 64, 3, { 2, 3, 4 }, 2, { 2, 3 } }, { 128, 1, { 3 }, 1, { 3 } }, + { 128, 2, { 1, 2 }, 2, { 1, 2 } }, { 128, 3, { 2, 3, 4 }, 2, { 2, 3 } }, +}; + +// Find smallest k>=0 such that (blk_size << k) >= target +static INLINE int tile_log2(int blk_size, int target) { + int k; + for (k = 0; (blk_size << k) < target; k++) { + } + return k; +} + +// This class is used to validate tile configuration for uniform spacing. +class UniformTileConfigTestLarge + : public ::libaom_test::CodecTestWith3Params< + libaom_test::TestMode, uniformTileConfigParam, aom_rc_mode>, + public ::libaom_test::EncoderTest { + protected: + UniformTileConfigTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + tile_config_param_(GET_PARAM(2)), end_usage_check_(GET_PARAM(3)) { + tile_config_violated_ = false; + max_tile_cols_log2_ = tile_log2(1, AOM_MAX_TILE_COLS); + max_tile_rows_log2_ = tile_log2(1, AOM_MAX_TILE_ROWS); + } + ~UniformTileConfigTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = end_usage_check_; + cfg_.g_threads = 1; + cfg_.g_lag_in_frames = 19; + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_TILE_COLUMNS, tile_config_param_.tile_cols); + encoder->Control(AV1E_SET_TILE_ROWS, tile_config_param_.tile_rows); + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, + tile_config_param_.sb_size == 64 + ? AOM_SUPERBLOCK_SIZE_64X64 + : AOM_SUPERBLOCK_SIZE_128X128); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + aom_tile_info tile_info; + int config_tile_columns = AOMMIN(1 << (int)tile_config_param_.tile_cols, + 1 << max_tile_cols_log2_); + int config_tile_rows = AOMMIN(1 << (int)tile_config_param_.tile_rows, + 1 << max_tile_rows_log2_); + + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_TILE_INFO, &tile_info); + if (tile_info.tile_columns != config_tile_columns || + tile_info.tile_rows != config_tile_rows) { + tile_config_violated_ = true; + } + } + return AOM_CODEC_OK == res_dec; + } + + ::libaom_test::TestMode encoding_mode_; + const uniformTileConfigParam tile_config_param_; + int max_tile_cols_log2_; + int max_tile_rows_log2_; + bool tile_config_violated_; + aom_rc_mode end_usage_check_; +}; + +// This class is used to validate tile configuration for non uniform spacing. +class NonUniformTileConfigTestLarge + : public ::libaom_test::CodecTestWith3Params< + libaom_test::TestMode, nonUniformTileConfigParam, aom_rc_mode>, + public ::libaom_test::EncoderTest { + protected: + NonUniformTileConfigTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + tile_config_param_(GET_PARAM(2)), rc_end_usage_(GET_PARAM(3)) { + tile_config_violated_ = false; + } + ~NonUniformTileConfigTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = rc_end_usage_; + cfg_.g_threads = 1; + cfg_.g_lag_in_frames = 35; + cfg_.rc_target_bitrate = 1000; + cfg_.tile_width_count = tile_config_param_.tile_width_count; + memcpy(cfg_.tile_widths, tile_config_param_.tile_widths, + sizeof(tile_config_param_.tile_widths[0]) * + tile_config_param_.tile_width_count); + cfg_.tile_height_count = tile_config_param_.tile_height_count; + memcpy(cfg_.tile_heights, tile_config_param_.tile_heights, + sizeof(tile_config_param_.tile_heights[0]) * + tile_config_param_.tile_height_count); + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, + tile_config_param_.sb_size == 64 + ? AOM_SUPERBLOCK_SIZE_64X64 + : AOM_SUPERBLOCK_SIZE_128X128); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec) { + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + aom_tile_info tile_info; + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_TILE_INFO, &tile_info); + + // check validity of tile cols + int tile_col_idx, tile_col = 0; + for (tile_col_idx = 0; tile_col_idx < tile_info.tile_columns - 1; + tile_col_idx++) { + if (tile_config_param_.tile_widths[tile_col] != + tile_info.tile_widths[tile_col_idx]) + tile_config_violated_ = true; + tile_col = (tile_col + 1) % (int)tile_config_param_.tile_width_count; + } + // last column may not be able to accommodate config, but if it is + // greater than what is configured, there is a violation. + if (tile_config_param_.tile_widths[tile_col] < + tile_info.tile_widths[tile_col_idx]) + tile_config_violated_ = true; + + // check validity of tile rows + int tile_row_idx, tile_row = 0; + for (tile_row_idx = 0; tile_row_idx < tile_info.tile_rows - 1; + tile_row_idx++) { + if (tile_config_param_.tile_heights[tile_row] != + tile_info.tile_heights[tile_row_idx]) + tile_config_violated_ = true; + tile_row = (tile_row + 1) % (int)tile_config_param_.tile_height_count; + } + // last row may not be able to accommodate config, but if it is + // greater than what is configured, there is a violation. + if (tile_config_param_.tile_heights[tile_row] < + tile_info.tile_heights[tile_row_idx]) + tile_config_violated_ = true; + } + return AOM_CODEC_OK == res_dec; + } + + ::libaom_test::TestMode encoding_mode_; + const nonUniformTileConfigParam tile_config_param_; + bool tile_config_violated_; + aom_rc_mode rc_end_usage_; +}; + +TEST_P(UniformTileConfigTestLarge, UniformTileConfigTest) { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 1); + ASSERT_NO_FATAL_FAILURE(video.Begin()); + + int max_tiles_cols = video.img()->w / (int)tile_config_param_.sb_size; + int max_tiles_rows = video.img()->h / (int)tile_config_param_.sb_size; + max_tile_cols_log2_ = tile_log2(1, AOMMIN(max_tiles_cols, AOM_MAX_TILE_COLS)); + max_tile_rows_log2_ = tile_log2(1, AOMMIN(max_tiles_rows, AOM_MAX_TILE_ROWS)); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(tile_config_violated_, false); +} + +TEST_P(UniformTileConfigTestLarge, UniformTileConfigTestLowRes) { + ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 1); + ASSERT_NO_FATAL_FAILURE(video.Begin()); + + int max_tiles_cols = video.img()->w / (int)tile_config_param_.sb_size; + int max_tiles_rows = video.img()->h / (int)tile_config_param_.sb_size; + max_tile_cols_log2_ = tile_log2(1, AOMMIN(max_tiles_cols, AOM_MAX_TILE_COLS)); + max_tile_rows_log2_ = tile_log2(1, AOMMIN(max_tiles_rows, AOM_MAX_TILE_ROWS)); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(tile_config_violated_, false); +} + +TEST_P(NonUniformTileConfigTestLarge, NonUniformTileConfigTest) { + ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 1); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_EQ(tile_config_violated_, false); +} + +AV1_INSTANTIATE_TEST_SUITE(UniformTileConfigTestLarge, + ::testing::ValuesIn(kTestModeParams), + ::testing::ValuesIn(uniformTileConfigParams), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ)); + +AV1_INSTANTIATE_TEST_SUITE(NonUniformTileConfigTestLarge, + ::testing::ValuesIn(kTestModeParams), + ::testing::ValuesIn(nonUniformTileConfigParams), + ::testing::Values(AOM_Q, AOM_VBR, AOM_CBR, AOM_CQ)); + +typedef struct { + // Number of tile groups to set. + const int num_tg; + // Number of tile rows to set + const int num_tile_rows; + // Number of tile columns to set + const int num_tile_cols; +} TileGroupConfigParams; + +static const TileGroupConfigParams tileGroupTestParams[] = { + { 5, 4, 4 }, { 3, 3, 3 }, { 5, 3, 3 }, { 7, 5, 5 }, { 7, 3, 3 }, { 7, 4, 4 } +}; + +std::ostream &operator<<(std::ostream &os, + const TileGroupConfigParams &test_arg) { + return os << "TileGroupConfigParams { num_tg:" << test_arg.num_tg + << " num_tile_rows:" << test_arg.num_tile_rows + << " num_tile_cols:" << test_arg.num_tile_cols << " }"; +} + +// This class is used to test number of tile groups present in header. +class TileGroupTestLarge + : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, + TileGroupConfigParams>, + public ::libaom_test::EncoderTest { + protected: + TileGroupTestLarge() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + tile_group_config_params_(GET_PARAM(2)) { + tile_group_config_violated_ = false; + } + ~TileGroupTestLarge() override = default; + + void SetUp() override { + InitializeConfig(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cfg_.rc_end_usage = AOM_Q; + cfg_.g_threads = 1; + } + + bool DoDecode() const override { return true; } + + void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); + encoder->Control(AV1E_SET_NUM_TG, tile_group_config_params_.num_tg); + encoder->Control(AV1E_SET_TILE_COLUMNS, + tile_group_config_params_.num_tile_cols); + encoder->Control(AV1E_SET_TILE_ROWS, + tile_group_config_params_.num_tile_rows); + } + } + + bool HandleDecodeResult(const aom_codec_err_t res_dec, + libaom_test::Decoder *decoder) override { + EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); + if (AOM_CODEC_OK == res_dec) { + aom_tile_info tile_info; + aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_TILE_INFO, &tile_info); + AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_SHOW_EXISTING_FRAME_FLAG, + &show_existing_frame_); + if (tile_info.num_tile_groups != tile_group_config_params_.num_tg && + !show_existing_frame_) + tile_group_config_violated_ = true; + EXPECT_EQ(tile_group_config_violated_, false); + } + return AOM_CODEC_OK == res_dec; + } + + int show_existing_frame_; + bool tile_group_config_violated_; + aom_rc_mode end_usage_check_; + ::libaom_test::TestMode encoding_mode_; + const TileGroupConfigParams tile_group_config_params_; +}; + +TEST_P(TileGroupTestLarge, TileGroupCountTest) { + libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 5); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +AV1_INSTANTIATE_TEST_SUITE(TileGroupTestLarge, + ::testing::ValuesIn(kTestModeParams), + ::testing::ValuesIn(tileGroupTestParams)); +} // namespace diff --git a/third_party/aom/test/tile_independence_test.cc b/third_party/aom/test/tile_independence_test.cc new file mode 100644 index 0000000000..84406dd3fb --- /dev/null +++ b/third_party/aom/test/tile_independence_test.cc @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdio> +#include <cstdlib> +#include <string> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/md5_helper.h" +#include "aom_mem/aom_mem.h" + +namespace { +class TileIndependenceTest + : public ::libaom_test::CodecTestWith3Params<int, int, int>, + public ::libaom_test::EncoderTest { + protected: + TileIndependenceTest() + : EncoderTest(GET_PARAM(0)), md5_fw_order_(), md5_inv_order_(), + n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)), + n_tile_groups_(GET_PARAM(3)) { + init_flags_ = AOM_CODEC_USE_PSNR; + aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t(); + cfg.w = 704; + cfg.h = 576; + cfg.threads = 1; + cfg.allow_lowbitdepth = 1; + fw_dec_ = codec_->CreateDecoder(cfg, 0); + inv_dec_ = codec_->CreateDecoder(cfg, 0); + inv_dec_->Control(AV1_INVERT_TILE_DECODE_ORDER, 1); + + if (fw_dec_->IsAV1() && inv_dec_->IsAV1()) { + fw_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1); + fw_dec_->Control(AV1_SET_DECODE_TILE_COL, -1); + inv_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1); + inv_dec_->Control(AV1_SET_DECODE_TILE_COL, -1); + } + } + + ~TileIndependenceTest() override { + delete fw_dec_; + delete inv_dec_; + } + + void SetUp() override { InitializeConfig(libaom_test::kTwoPassGood); } + + void PreEncodeFrameHook(libaom_test::VideoSource *video, + libaom_test::Encoder *encoder) override { + if (video->frame() == 0) { + encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_); + encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_); + SetCpuUsed(encoder); + } else if (video->frame() == 3) { + encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_); + } + } + + virtual void SetCpuUsed(libaom_test::Encoder *encoder) { + static const int kCpuUsed = 3; + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); + } + + void UpdateMD5(::libaom_test::Decoder *dec, const aom_codec_cx_pkt_t *pkt, + ::libaom_test::MD5 *md5) { + const aom_codec_err_t res = dec->DecodeFrame( + reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz); + if (res != AOM_CODEC_OK) { + abort_ = true; + ASSERT_EQ(AOM_CODEC_OK, res); + } + const aom_image_t *img = dec->GetDxData().Next(); + md5->Add(img); + } + + void FramePktHook(const aom_codec_cx_pkt_t *pkt) override { + UpdateMD5(fw_dec_, pkt, &md5_fw_order_); + UpdateMD5(inv_dec_, pkt, &md5_inv_order_); + } + + void DoTest() { + const aom_rational timebase = { 33333333, 1000000000 }; + cfg_.g_timebase = timebase; + cfg_.rc_target_bitrate = 500; + cfg_.g_lag_in_frames = 12; + cfg_.rc_end_usage = AOM_VBR; + + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576, + timebase.den, timebase.num, 0, 5); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + const char *md5_fw_str = md5_fw_order_.Get(); + const char *md5_inv_str = md5_inv_order_.Get(); + ASSERT_STREQ(md5_fw_str, md5_inv_str); + } + + ::libaom_test::MD5 md5_fw_order_, md5_inv_order_; + ::libaom_test::Decoder *fw_dec_, *inv_dec_; + + private: + int n_tile_cols_; + int n_tile_rows_; + int n_tile_groups_; +}; + +// run an encode with 2 or 4 tiles, and do the decode both in normal and +// inverted tile ordering. Ensure that the MD5 of the output in both cases +// is identical. If so, tiles are considered independent and the test passes. +TEST_P(TileIndependenceTest, MD5Match) { + cfg_.large_scale_tile = 0; + fw_dec_->Control(AV1_SET_TILE_MODE, 0); + inv_dec_->Control(AV1_SET_TILE_MODE, 0); + DoTest(); +} + +class TileIndependenceTestLarge : public TileIndependenceTest { + void SetCpuUsed(libaom_test::Encoder *encoder) override { + static const int kCpuUsed = 0; + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); + } +}; + +TEST_P(TileIndependenceTestLarge, MD5Match) { + cfg_.large_scale_tile = 0; + fw_dec_->Control(AV1_SET_TILE_MODE, 0); + inv_dec_->Control(AV1_SET_TILE_MODE, 0); + DoTest(); +} + +AV1_INSTANTIATE_TEST_SUITE(TileIndependenceTest, ::testing::Values(0, 1), + ::testing::Values(0, 1), ::testing::Values(1, 2, 4)); +AV1_INSTANTIATE_TEST_SUITE(TileIndependenceTestLarge, ::testing::Values(0, 1), + ::testing::Values(0, 1), ::testing::Values(1, 2, 4)); + +class TileIndependenceLSTest : public TileIndependenceTest {}; + +TEST_P(TileIndependenceLSTest, MD5Match) { + cfg_.large_scale_tile = 1; + fw_dec_->Control(AV1_SET_TILE_MODE, 1); + fw_dec_->Control(AV1D_EXT_TILE_DEBUG, 1); + inv_dec_->Control(AV1_SET_TILE_MODE, 1); + inv_dec_->Control(AV1D_EXT_TILE_DEBUG, 1); + DoTest(); +} + +class TileIndependenceLSTestLarge : public TileIndependenceTestLarge {}; + +TEST_P(TileIndependenceLSTestLarge, MD5Match) { + cfg_.large_scale_tile = 1; + fw_dec_->Control(AV1_SET_TILE_MODE, 1); + fw_dec_->Control(AV1D_EXT_TILE_DEBUG, 1); + inv_dec_->Control(AV1_SET_TILE_MODE, 1); + inv_dec_->Control(AV1D_EXT_TILE_DEBUG, 1); + DoTest(); +} + +AV1_INSTANTIATE_TEST_SUITE(TileIndependenceLSTest, ::testing::Values(6), + ::testing::Values(6), ::testing::Values(1)); +AV1_INSTANTIATE_TEST_SUITE(TileIndependenceLSTestLarge, ::testing::Values(6), + ::testing::Values(6), ::testing::Values(1)); +} // namespace diff --git a/third_party/aom/test/time_stamp_test.cc b/third_party/aom/test/time_stamp_test.cc new file mode 100644 index 0000000000..5de98b719e --- /dev/null +++ b/third_party/aom/test/time_stamp_test.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Test AOM timestamp handling + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/video_source.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +const int kVideoSourceWidth = 320; +const int kVideoSourceHeight = 240; +const int kFramesToEncode = 3; + +// A video source that exposes functions to set the timebase, framerate and +// starting pts. +class DummyTimebaseVideoSource : public ::libaom_test::DummyVideoSource { + public: + // Parameters num and den set the timebase for the video source. + DummyTimebaseVideoSource(int num, int den) + : framerate_numerator_(30), framerate_denominator_(1), starting_pts_(0) { + SetSize(kVideoSourceWidth, kVideoSourceHeight); + set_limit(kFramesToEncode); + timebase_.num = num; + timebase_.den = den; + } + + void SetFramerate(int numerator, int denominator) { + framerate_numerator_ = numerator; + framerate_denominator_ = denominator; + } + + // Returns one frames duration in timebase units as a double. + double FrameDuration() const { + return (static_cast<double>(timebase_.den) / timebase_.num) / + (static_cast<double>(framerate_numerator_) / framerate_denominator_); + } + + aom_codec_pts_t pts() const override { + return static_cast<aom_codec_pts_t>(frame_ * FrameDuration() + + starting_pts_ + 0.5); + } + + unsigned long duration() const override { + return static_cast<unsigned long>(FrameDuration() + 0.5); + } + + aom_rational_t timebase() const override { return timebase_; } + + void set_starting_pts(int64_t starting_pts) { starting_pts_ = starting_pts; } + + private: + aom_rational_t timebase_; + int framerate_numerator_; + int framerate_denominator_; + int64_t starting_pts_; +}; + +class TimestampTest + : public ::libaom_test::EncoderTest, + public ::libaom_test::CodecTestWithParam<libaom_test::TestMode> { + protected: + TimestampTest() : EncoderTest(GET_PARAM(0)) {} + ~TimestampTest() override = default; + + void SetUp() override { InitializeConfig(GET_PARAM(1)); } +}; + +// Tests encoding in millisecond timebase. +TEST_P(TimestampTest, EncodeFrames) { + DummyTimebaseVideoSource video(1, 1000); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +TEST_P(TimestampTest, TestMicrosecondTimebase) { + // Set the timebase to microseconds. + DummyTimebaseVideoSource video(1, 1000000); + video.set_limit(1); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +TEST_P(TimestampTest, TestAv1Rollover) { + DummyTimebaseVideoSource video(1, 1000); + video.set_starting_pts(922337170351ll); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +#if CONFIG_REALTIME_ONLY +AV1_INSTANTIATE_TEST_SUITE(TimestampTest, + ::testing::Values(::libaom_test::kRealTime)); +#else +AV1_INSTANTIATE_TEST_SUITE(TimestampTest, + ::testing::Values(::libaom_test::kRealTime, + ::libaom_test::kTwoPassGood)); +#endif + +} // namespace diff --git a/third_party/aom/test/tools_common.sh b/third_party/aom/test/tools_common.sh new file mode 100755 index 0000000000..cb9eba1727 --- /dev/null +++ b/third_party/aom/test/tools_common.sh @@ -0,0 +1,520 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file contains shell code shared by test scripts for libaom tools. + +# Use $AOM_TEST_TOOLS_COMMON_SH as a pseudo include guard. +if [ -z "${AOM_TEST_TOOLS_COMMON_SH}" ]; then +AOM_TEST_TOOLS_COMMON_SH=included + +set -e +devnull='> /dev/null 2>&1' +AOM_TEST_PREFIX="" + +elog() { + echo "$@" 1>&2 +} + +vlog() { + if [ "${AOM_TEST_VERBOSE_OUTPUT}" = "yes" ]; then + echo "$@" + fi +} + +# Sets $AOM_TOOL_TEST to the name specified by positional parameter one. +test_begin() { + AOM_TOOL_TEST="${1}" +} + +# Clears the AOM_TOOL_TEST variable after confirming that $AOM_TOOL_TEST matches +# positional parameter one. +test_end() { + if [ "$1" != "${AOM_TOOL_TEST}" ]; then + echo "FAIL completed test mismatch!." + echo " completed test: ${1}" + echo " active test: ${AOM_TOOL_TEST}." + return 1 + fi + AOM_TOOL_TEST='<unset>' +} + +# Echoes the target configuration being tested. +test_configuration_target() { + aom_config_c="${LIBAOM_CONFIG_PATH}/config/aom_config.c" + # Clean up the cfg pointer line from aom_config.c for easier re-use by + # someone examining a failure in the example tests. + # 1. Run grep on aom_config.c for cfg and limit the results to 1. + # 2. Split the line using ' = ' as separator. + # 3. Abuse sed to consume the leading " and trailing "; from the assignment + # to the cfg pointer. + cmake_config=$(awk -F ' = ' '/cfg/ { print $NF; exit }' "${aom_config_c}" \ + | sed -e s/\"// -e s/\"\;//) + echo cmake generated via command: cmake path/to/aom ${cmake_config} +} + +# Trap function used for failure reports and tool output directory removal. +# When the contents of $AOM_TOOL_TEST do not match the string '<unset>', reports +# failure of test stored in $AOM_TOOL_TEST. +cleanup() { + if [ -n "${AOM_TOOL_TEST}" ] && [ "${AOM_TOOL_TEST}" != '<unset>' ]; then + echo "FAIL: $AOM_TOOL_TEST" + fi + if [ "${AOM_TEST_PRESERVE_OUTPUT}" = "yes" ]; then + return + fi + if [ -n "${AOM_TEST_OUTPUT_DIR}" ] && [ -d "${AOM_TEST_OUTPUT_DIR}" ]; then + rm -rf "${AOM_TEST_OUTPUT_DIR}" + fi +} + +# Echoes the version string assigned to the VERSION_STRING_NOSP variable defined +# in $LIBAOM_CONFIG_PATH/config/aom_version.h to stdout. +cmake_version() { + aom_version_h="${LIBAOM_CONFIG_PATH}/config/aom_version.h" + + # Find VERSION_STRING_NOSP line, split it with '"' and print the next to last + # field to output the version string to stdout. + aom_version=$(awk -F \" '/VERSION_STRING_NOSP/ {print $(NF-1)}' \ + "${aom_version_h}") + echo "v${aom_version}" +} + +# Echoes current git version as reported by running 'git describe', or the +# version used by the cmake build when git is unavailable. +source_version() { + if git --version > /dev/null 2>&1; then + (cd "$(dirname "${0}")" + git describe) + else + cmake_version + fi +} + +# Echoes warnings to stdout when source version and CMake build generated +# version are out of sync. +check_version_strings() { + cmake_version=$(cmake_version) + source_version=$(source_version) + + if [ "${cmake_version}" != "${source_version}" ]; then + echo "Warning: version has changed since last cmake run." + vlog " cmake version: ${cmake_version} version now: ${source_version}" + fi +} + +# $1 is the name of an environment variable containing a directory name to +# test. +test_env_var_dir() { + local dir=$(eval echo "\${$1}") + if [ ! -d "${dir}" ]; then + elog "'${dir}': No such directory" + elog "The $1 environment variable must be set to a valid directory." + return 1 + fi +} + +# This script requires that the LIBAOM_BIN_PATH, LIBAOM_CONFIG_PATH, and +# LIBAOM_TEST_DATA_PATH variables are in the environment: Confirm that +# the variables are set and that they all evaluate to directory paths. +verify_aom_test_environment() { + test_env_var_dir "LIBAOM_BIN_PATH" \ + && test_env_var_dir "LIBAOM_CONFIG_PATH" \ + && test_env_var_dir "LIBAOM_TEST_DATA_PATH" +} + +# Greps aom_config.h in LIBAOM_CONFIG_PATH for positional parameter one, which +# should be a LIBAOM preprocessor flag. Echoes yes to stdout when the feature +# is available. +aom_config_option_enabled() { + aom_config_option="${1}" + aom_config_file="${LIBAOM_CONFIG_PATH}/config/aom_config.h" + config_line=$(grep "${aom_config_option}" "${aom_config_file}") + if echo "${config_line}" | egrep -q '1$'; then + echo yes + fi +} + +# Echoes yes when output of test_configuration_target() contains win32 or win64. +is_windows_target() { + if test_configuration_target \ + | grep -q -e win32 -e win64 > /dev/null 2>&1; then + echo yes + fi +} + +# Echoes path to $1 when it's executable and exists in one of the directories +# included in $tool_paths, or an empty string. Caller is responsible for testing +# the string once the function returns. +aom_tool_path() { + local tool_name="$1" + local root_path="${LIBAOM_BIN_PATH}" + local suffix="${AOM_TEST_EXE_SUFFIX}" + local tool_paths="\ + ${root_path}/${tool_name}${suffix} \ + ${root_path}/../${tool_name}${suffix} \ + ${root_path}/tools/${tool_name}${suffix} \ + ${root_path}/../tools/${tool_name}${suffix}" + + local toolpath="" + + for tool_path in ${tool_paths}; do + if [ -x "${tool_path}" ] && [ -f "${tool_path}" ]; then + echo "${tool_path}" + return 0 + fi + done + + return 1 +} + +# Echoes yes to stdout when the file named by positional parameter one exists +# in LIBAOM_BIN_PATH, and is executable. +aom_tool_available() { + local tool_name="$1" + local tool="${LIBAOM_BIN_PATH}/${tool_name}${AOM_TEST_EXE_SUFFIX}" + [ -x "${tool}" ] && echo yes +} + +# Echoes yes to stdout when aom_config_option_enabled() reports yes for +# CONFIG_AV1_DECODER. +av1_decode_available() { + [ "$(aom_config_option_enabled CONFIG_AV1_DECODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when aom_config_option_enabled() reports yes for +# CONFIG_AV1_ENCODER. +av1_encode_available() { + [ "$(aom_config_option_enabled CONFIG_AV1_ENCODER)" = "yes" ] && echo yes +} + +# Echoes "fast" encode params for use with aomenc. +aomenc_encode_test_fast_params() { + echo "--cpu-used=2 + --limit=${AV1_ENCODE_TEST_FRAME_LIMIT} + --lag-in-frames=0 + --test-decode=fatal" +} + +# Echoes realtime encode params for use with aomenc. +aomenc_encode_test_rt_params() { + echo "--limit=${AV1_ENCODE_TEST_FRAME_LIMIT} + --test-decode=fatal + --enable-tpl-model=0 + --deltaq-mode=0 + --enable-order-hint=0 + --profile=0 + --static-thresh=0 + --end-usage=cbr + --cpu-used=7 + --passes=1 + --usage=1 + --lag-in-frames=0 + --aq-mode=3 + --enable-obmc=0 + --enable-warped-motion=0 + --enable-ref-frame-mvs=0 + --enable-cdef=1 + --enable-order-hint=0 + --coeff-cost-upd-freq=3 + --mode-cost-upd-freq=3 + --mv-cost-upd-freq=3" +} + +# Echoes yes to stdout when aom_config_option_enabled() reports yes for +# CONFIG_AV1_HIGHBITDEPTH. +highbitdepth_available() { + [ "$(aom_config_option_enabled CONFIG_AV1_HIGHBITDEPTH)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when aom_config_option_enabled() reports yes for +# CONFIG_WEBM_IO. +webm_io_available() { + [ "$(aom_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when aom_config_option_enabled() reports yes for +# CONFIG_REALTIME_ONLY. +realtime_only_build() { + [ "$(aom_config_option_enabled CONFIG_REALTIME_ONLY)" = "yes" ] && echo yes +} + +# Filters strings from $1 using the filter specified by $2. Filter behavior +# depends on the presence of $3. When $3 is present, strings that match the +# filter are excluded. When $3 is omitted, strings matching the filter are +# included. +# The filtered result is echoed to stdout. +filter_strings() { + strings=${1} + filter=${2} + exclude=${3} + + if [ -n "${exclude}" ]; then + # When positional parameter three exists the caller wants to remove strings. + # Tell grep to invert matches using the -v argument. + exclude='-v' + else + unset exclude + fi + + if [ -n "${filter}" ]; then + for s in ${strings}; do + if echo "${s}" | egrep -q ${exclude} "${filter}" > /dev/null 2>&1; then + filtered_strings="${filtered_strings} ${s}" + fi + done + else + filtered_strings="${strings}" + fi + echo "${filtered_strings}" +} + +# Runs user test functions passed via positional parameters one and two. +# Functions in positional parameter one are treated as environment verification +# functions and are run unconditionally. Functions in positional parameter two +# are run according to the rules specified in aom_test_usage(). +run_tests() { + local env_tests="verify_aom_test_environment $1" + local tests_to_filter="$2" + local test_name="${AOM_TEST_NAME}" + + if [ -z "${test_name}" ]; then + test_name="$(basename "${0%.*}")" + fi + + if [ "${AOM_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then + # Filter out DISABLED tests. + tests_to_filter=$(filter_strings "${tests_to_filter}" ^DISABLED exclude) + fi + + if [ -n "${AOM_TEST_FILTER}" ]; then + # Remove tests not matching the user's filter. + tests_to_filter=$(filter_strings "${tests_to_filter}" ${AOM_TEST_FILTER}) + fi + + # User requested test listing: Dump test names and return. + if [ "${AOM_TEST_LIST_TESTS}" = "yes" ]; then + for test_name in $tests_to_filter; do + echo ${test_name} + done + return + fi + + # Don't bother with the environment tests if everything else was disabled. + [ -z "${tests_to_filter}" ] && return + + # Combine environment and actual tests. + local tests_to_run="${env_tests} ${tests_to_filter}" + + # av1_c_vs_simd_encode is a standalone test, and it doesn't need to check the + # version string. + if [ "${test_name}" != "av1_c_vs_simd_encode" ]; then + check_version_strings + fi + + # Run tests. + for test in ${tests_to_run}; do + test_begin "${test}" + vlog " RUN ${test}" + "${test}" + vlog " PASS ${test}" + test_end "${test}" + done + + local tested_config="$(test_configuration_target) @ $(source_version)" + echo "${test_name}: Done, all tests pass for ${tested_config}." +} + +aom_test_usage() { +cat << EOF + Usage: ${0##*/} [arguments] + --bin-path <path to libaom binaries directory> + --config-path <path to libaom config directory> + --filter <filter>: User test filter. Only tests matching filter are run. + --run-disabled-tests: Run disabled tests. + --help: Display this message and exit. + --test-data-path <path to libaom test data directory> + --show-program-output: Shows output from all programs being tested. + --prefix: Allows for a user specified prefix to be inserted before all test + programs. Grants the ability, for example, to run test programs + within valgrind. + --list-tests: List all test names and exit without actually running tests. + --verbose: Verbose output. + + When the --bin-path option is not specified the script attempts to use + \$LIBAOM_BIN_PATH and then the current directory. + + When the --config-path option is not specified the script attempts to use + \$LIBAOM_CONFIG_PATH and then the current directory. + + When the -test-data-path option is not specified the script attempts to use + \$LIBAOM_TEST_DATA_PATH and then the current directory. +EOF +} + +# Returns non-zero (failure) when required environment variables are empty +# strings. +aom_test_check_environment() { + if [ -z "${LIBAOM_BIN_PATH}" ] || \ + [ -z "${LIBAOM_CONFIG_PATH}" ] || \ + [ -z "${LIBAOM_TEST_DATA_PATH}" ]; then + return 1 + fi +} + +# Echo aomenc command line parameters allowing use of a raw yuv file as +# input to aomenc. +yuv_raw_input() { + echo ""${YUV_RAW_INPUT}" + --width="${YUV_RAW_INPUT_WIDTH}" + --height="${YUV_RAW_INPUT_HEIGHT}"" +} + +# Do a small encode for testing decoders. +encode_yuv_raw_input_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + local output="$1" + local encoder="$(aom_tool_path aomenc)" + shift + eval "${encoder}" $(yuv_raw_input) \ + $(aomenc_encode_test_fast_params) \ + --output="${output}" \ + $@ \ + ${devnull} + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + fi +} + +# Parse the command line. +while [ -n "$1" ]; do + case "$1" in + --bin-path) + LIBAOM_BIN_PATH="$2" + shift + ;; + --config-path) + LIBAOM_CONFIG_PATH="$2" + shift + ;; + --filter) + AOM_TEST_FILTER="$2" + shift + ;; + --run-disabled-tests) + AOM_TEST_RUN_DISABLED_TESTS=yes + ;; + --help) + aom_test_usage + exit + ;; + --test-data-path) + LIBAOM_TEST_DATA_PATH="$2" + shift + ;; + --prefix) + AOM_TEST_PREFIX="$2" + shift + ;; + --verbose) + AOM_TEST_VERBOSE_OUTPUT=yes + ;; + --show-program-output) + devnull= + ;; + --list-tests) + AOM_TEST_LIST_TESTS=yes + ;; + *) + aom_test_usage + exit 1 + ;; + esac + shift +done + +# Handle running the tests from a build directory without arguments when running +# the tests on *nix/macosx. +LIBAOM_BIN_PATH="${LIBAOM_BIN_PATH:-.}" +LIBAOM_CONFIG_PATH="${LIBAOM_CONFIG_PATH:-.}" +LIBAOM_TEST_DATA_PATH="${LIBAOM_TEST_DATA_PATH:-.}" + +# Create a temporary directory for output files, and a trap to clean it up. +if [ -n "${TMPDIR}" ]; then + AOM_TEST_TEMP_ROOT="${TMPDIR}" +elif [ -n "${TEMPDIR}" ]; then + AOM_TEST_TEMP_ROOT="${TEMPDIR}" +else + AOM_TEST_TEMP_ROOT=/tmp +fi + +AOM_TEST_OUTPUT_DIR="${AOM_TEST_OUTPUT_DIR:-${AOM_TEST_TEMP_ROOT}/aom_test_$$}" + +if ! mkdir -p "${AOM_TEST_OUTPUT_DIR}" || \ + [ ! -d "${AOM_TEST_OUTPUT_DIR}" ]; then + echo "${0##*/}: Cannot create output directory, giving up." + echo "${0##*/}: AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}" + exit 1 +fi + +AOM_TEST_PRESERVE_OUTPUT=${AOM_TEST_PRESERVE_OUTPUT:-no} + +# This checking requires config/aom_config.c that is available in Jenkins +# testing. +if [ "$(is_windows_target)" = "yes" ]; then + AOM_TEST_EXE_SUFFIX=".exe" +fi + +# Variables shared by tests. +AV1_ENCODE_CPU_USED=${AV1_ENCODE_CPU_USED:-1} +AV1_ENCODE_TEST_FRAME_LIMIT=${AV1_ENCODE_TEST_FRAME_LIMIT:-5} +AV1_IVF_FILE="${AV1_IVF_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.ivf}" +AV1_OBU_ANNEXB_FILE="${AV1_OBU_ANNEXB_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.annexb.obu}" +AV1_OBU_SEC5_FILE="${AV1_OBU_SEC5_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.section5.obu}" +AV1_WEBM_FILE="${AV1_WEBM_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.webm}" + +YUV_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/hantro_collage_w352h288.yuv" +YUV_RAW_INPUT_WIDTH=352 +YUV_RAW_INPUT_HEIGHT=288 + +Y4M_NOSQ_PAR_INPUT="${LIBAOM_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m" +Y4M_720P_INPUT="${LIBAOM_TEST_DATA_PATH}/niklas_1280_720_30.y4m" + +# Setup a trap function to clean up after tests complete. +trap cleanup EXIT + +vlog "$(basename "${0%.*}") test configuration: + LIBAOM_BIN_PATH=${LIBAOM_BIN_PATH} + LIBAOM_CONFIG_PATH=${LIBAOM_CONFIG_PATH} + LIBAOM_TEST_DATA_PATH=${LIBAOM_TEST_DATA_PATH} + AOM_TEST_EXE_SUFFIX=${AOM_TEST_EXE_SUFFIX} + AOM_TEST_FILTER=${AOM_TEST_FILTER} + AOM_TEST_LIST_TESTS=${AOM_TEST_LIST_TESTS} + AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR} + AOM_TEST_PREFIX=${AOM_TEST_PREFIX} + AOM_TEST_PRESERVE_OUTPUT=${AOM_TEST_PRESERVE_OUTPUT} + AOM_TEST_RUN_DISABLED_TESTS=${AOM_TEST_RUN_DISABLED_TESTS} + AOM_TEST_SHOW_PROGRAM_OUTPUT=${AOM_TEST_SHOW_PROGRAM_OUTPUT} + AOM_TEST_TEMP_ROOT=${AOM_TEST_TEMP_ROOT} + AOM_TEST_VERBOSE_OUTPUT=${AOM_TEST_VERBOSE_OUTPUT} + AV1_ENCODE_CPU_USED=${AV1_ENCODE_CPU_USED} + AV1_ENCODE_TEST_FRAME_LIMIT=${AV1_ENCODE_TEST_FRAME_LIMIT} + AV1_IVF_FILE=${AV1_IVF_FILE} + AV1_OBU_ANNEXB_FILE=${AV1_OBU_ANNEXB_FILE} + AV1_OBU_SEC5_FILE=${AV1_OBU_SEC5_FILE} + AV1_WEBM_FILE=${AV1_WEBM_FILE} + YUV_RAW_INPUT=${YUV_RAW_INPUT} + YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH} + YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT} + Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}" + +fi # End $AOM_TEST_TOOLS_COMMON_SH pseudo include guard. diff --git a/third_party/aom/test/tpl_model_test.cc b/third_party/aom/test/tpl_model_test.cc new file mode 100644 index 0000000000..91eb5e94d3 --- /dev/null +++ b/third_party/aom/test/tpl_model_test.cc @@ -0,0 +1,529 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdlib> +#include <memory> +#include <new> +#include <vector> + +#include "av1/encoder/cost.h" +#include "av1/encoder/tpl_model.h" +#include "av1/encoder/encoder.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +#if CONFIG_BITRATE_ACCURACY +constexpr double epsilon = 0.0000001; +#endif + +double laplace_prob(double q_step, double b, double zero_bin_ratio, + int qcoeff) { + int abs_qcoeff = abs(qcoeff); + double z0 = fmax(exp(-zero_bin_ratio / 2 * q_step / b), TPL_EPSILON); + if (abs_qcoeff == 0) { + double p0 = 1 - z0; + return p0; + } else { + assert(abs_qcoeff > 0); + double z = fmax(exp(-q_step / b), TPL_EPSILON); + double p = z0 / 2 * (1 - z) * pow(z, abs_qcoeff - 1); + return p; + } +} +TEST(TplModelTest, ExponentialEntropyBoundaryTest1) { + double b = 0; + double q_step = 1; + double entropy = av1_exponential_entropy(q_step, b); + EXPECT_NEAR(entropy, 0, 0.00001); +} + +TEST(TplModelTest, TransformCoeffEntropyTest1) { + // Check the consistency between av1_estimate_coeff_entropy() and + // laplace_prob() + double b = 1; + double q_step = 1; + double zero_bin_ratio = 2; + for (int qcoeff = -256; qcoeff < 256; ++qcoeff) { + double rate = av1_estimate_coeff_entropy(q_step, b, zero_bin_ratio, qcoeff); + double prob = laplace_prob(q_step, b, zero_bin_ratio, qcoeff); + double ref_rate = -log2(prob); + EXPECT_DOUBLE_EQ(rate, ref_rate); + } +} + +TEST(TplModelTest, TransformCoeffEntropyTest2) { + // Check the consistency between av1_estimate_coeff_entropy(), laplace_prob() + // and av1_laplace_entropy() + double b = 1; + double q_step = 1; + double zero_bin_ratio = 2; + double est_expected_rate = 0; + for (int qcoeff = -20; qcoeff < 20; ++qcoeff) { + double rate = av1_estimate_coeff_entropy(q_step, b, zero_bin_ratio, qcoeff); + double prob = laplace_prob(q_step, b, zero_bin_ratio, qcoeff); + est_expected_rate += prob * rate; + } + double expected_rate = av1_laplace_entropy(q_step, b, zero_bin_ratio); + EXPECT_NEAR(expected_rate, est_expected_rate, 0.001); +} + +TEST(TplModelTest, InitTplStats1) { + // We use heap allocation instead of stack allocation here to avoid + // -Wstack-usage warning. + std::unique_ptr<TplParams> tpl_data(new (std::nothrow) TplParams); + ASSERT_NE(tpl_data, nullptr); + av1_zero(*tpl_data); + tpl_data->ready = 1; + EXPECT_EQ(sizeof(tpl_data->tpl_stats_buffer), + MAX_LENGTH_TPL_FRAME_STATS * sizeof(tpl_data->tpl_stats_buffer[0])); + for (int i = 0; i < MAX_LENGTH_TPL_FRAME_STATS; ++i) { + // Set it to a random non-zero number + tpl_data->tpl_stats_buffer[i].is_valid = i + 1; + } + av1_init_tpl_stats(tpl_data.get()); + EXPECT_EQ(tpl_data->ready, 0); + for (int i = 0; i < MAX_LENGTH_TPL_FRAME_STATS; ++i) { + EXPECT_EQ(tpl_data->tpl_stats_buffer[i].is_valid, 0); + } +} + +TEST(TplModelTest, DeltaRateCostZeroFlow) { + // When srcrf_dist equal to recrf_dist, av1_delta_rate_cost should return 0 + int64_t srcrf_dist = 256; + int64_t recrf_dist = 256; + int64_t delta_rate = 512; + int pixel_num = 256; + int64_t rate_cost = + av1_delta_rate_cost(delta_rate, recrf_dist, srcrf_dist, pixel_num); + EXPECT_EQ(rate_cost, 0); +} + +// a reference function of av1_delta_rate_cost() with delta_rate using bit as +// basic unit +double ref_delta_rate_cost(int64_t delta_rate, double src_rec_ratio, + int pixel_count) { + assert(src_rec_ratio <= 1 && src_rec_ratio >= 0); + double bits_per_pixel = (double)delta_rate / pixel_count; + double p = pow(2, bits_per_pixel); + double flow_rate_per_pixel = + sqrt(p * p / (src_rec_ratio * p * p + (1 - src_rec_ratio))); + double rate_cost = pixel_count * log2(flow_rate_per_pixel); + return rate_cost; +} + +TEST(TplModelTest, DeltaRateCostReference) { + const int64_t scale = TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT; + std::vector<int64_t> srcrf_dist_arr = { 256, 257, 312 }; + std::vector<int64_t> recrf_dist_arr = { 512, 288, 620 }; + std::vector<int64_t> delta_rate_arr = { 10, 278, 100 }; + for (size_t t = 0; t < srcrf_dist_arr.size(); ++t) { + int64_t srcrf_dist = srcrf_dist_arr[t]; + int64_t recrf_dist = recrf_dist_arr[t]; + int64_t delta_rate = delta_rate_arr[t]; + int64_t scaled_delta_rate = delta_rate << scale; + int pixel_count = 256; + int64_t rate_cost = av1_delta_rate_cost(scaled_delta_rate, recrf_dist, + srcrf_dist, pixel_count); + rate_cost >>= scale; + double src_rec_ratio = (double)srcrf_dist / recrf_dist; + double ref_rate_cost = + ref_delta_rate_cost(delta_rate, src_rec_ratio, pixel_count); + EXPECT_NEAR((double)rate_cost, ref_rate_cost, 1); + } +} + +TEST(TplModelTest, GetOverlapAreaHasOverlap) { + // The block a's area is [10, 17) x [18, 24). + // The block b's area is [8, 15) x [17, 23). + // The overlapping area between block a and block b is [10, 15) x [18, 23). + // Therefore, the size of the area is (15 - 10) * (23 - 18) = 25. + int row_a = 10; + int col_a = 18; + int row_b = 8; + int col_b = 17; + int height = 7; + int width = 6; + int overlap_area = + av1_get_overlap_area(row_a, col_a, row_b, col_b, width, height); + EXPECT_EQ(overlap_area, 25); +} + +TEST(TplModelTest, GetOverlapAreaNoOverlap) { + // The block a's area is [10, 14) x [18, 22). + // The block b's area is [5, 9) x [5, 9). + // Threre is no overlapping area between block a and block b. + // Therefore, the return value should be zero. + int row_a = 10; + int col_a = 18; + int row_b = 5; + int col_b = 5; + int height = 4; + int width = 4; + int overlap_area = + av1_get_overlap_area(row_a, col_a, row_b, col_b, width, height); + EXPECT_EQ(overlap_area, 0); +} + +TEST(TplModelTest, GetQIndexFromQstepRatio) { + const aom_bit_depth_t bit_depth = AOM_BITS_8; + // When qstep_ratio is 1, the output q_index should be equal to leaf_qindex. + double qstep_ratio = 1.0; + for (int leaf_qindex = 1; leaf_qindex <= 255; ++leaf_qindex) { + const int q_index = + av1_get_q_index_from_qstep_ratio(leaf_qindex, qstep_ratio, bit_depth); + EXPECT_EQ(q_index, leaf_qindex); + } + + // When qstep_ratio is very low, the output q_index should be 1. + qstep_ratio = 0.0001; + for (int leaf_qindex = 1; leaf_qindex <= 255; ++leaf_qindex) { + const int q_index = + av1_get_q_index_from_qstep_ratio(leaf_qindex, qstep_ratio, bit_depth); + EXPECT_EQ(q_index, 0); + } +} + +TEST(TplModelTest, TxfmStatsInitTest) { + TplTxfmStats tpl_txfm_stats; + av1_init_tpl_txfm_stats(&tpl_txfm_stats); + EXPECT_EQ(tpl_txfm_stats.coeff_num, 256); + EXPECT_EQ(tpl_txfm_stats.txfm_block_count, 0); + for (int i = 0; i < tpl_txfm_stats.coeff_num; ++i) { + EXPECT_DOUBLE_EQ(tpl_txfm_stats.abs_coeff_sum[i], 0); + } +} + +#if CONFIG_BITRATE_ACCURACY +TEST(TplModelTest, TxfmStatsAccumulateTest) { + TplTxfmStats sub_stats; + av1_init_tpl_txfm_stats(&sub_stats); + sub_stats.txfm_block_count = 17; + for (int i = 0; i < sub_stats.coeff_num; ++i) { + sub_stats.abs_coeff_sum[i] = i; + } + + TplTxfmStats accumulated_stats; + av1_init_tpl_txfm_stats(&accumulated_stats); + accumulated_stats.txfm_block_count = 13; + for (int i = 0; i < accumulated_stats.coeff_num; ++i) { + accumulated_stats.abs_coeff_sum[i] = 5 * i; + } + + av1_accumulate_tpl_txfm_stats(&sub_stats, &accumulated_stats); + EXPECT_DOUBLE_EQ(accumulated_stats.txfm_block_count, 30); + for (int i = 0; i < accumulated_stats.coeff_num; ++i) { + EXPECT_DOUBLE_EQ(accumulated_stats.abs_coeff_sum[i], 6 * i); + } +} + +TEST(TplModelTest, TxfmStatsRecordTest) { + TplTxfmStats stats1; + TplTxfmStats stats2; + av1_init_tpl_txfm_stats(&stats1); + av1_init_tpl_txfm_stats(&stats2); + + tran_low_t coeff[256]; + for (int i = 0; i < 256; ++i) { + coeff[i] = i; + } + av1_record_tpl_txfm_block(&stats1, coeff); + EXPECT_EQ(stats1.txfm_block_count, 1); + + // we record the same transform block twice for testing purpose + av1_record_tpl_txfm_block(&stats2, coeff); + av1_record_tpl_txfm_block(&stats2, coeff); + EXPECT_EQ(stats2.txfm_block_count, 2); + + EXPECT_EQ(stats1.coeff_num, 256); + EXPECT_EQ(stats2.coeff_num, 256); + for (int i = 0; i < 256; ++i) { + EXPECT_DOUBLE_EQ(stats2.abs_coeff_sum[i], 2 * stats1.abs_coeff_sum[i]); + } +} +#endif // CONFIG_BITRATE_ACCURACY + +TEST(TplModelTest, ComputeMVDifferenceTest) { + TplDepFrame tpl_frame_small; + tpl_frame_small.is_valid = true; + tpl_frame_small.mi_rows = 4; + tpl_frame_small.mi_cols = 4; + tpl_frame_small.stride = 1; + uint8_t right_shift_small = 1; + int step_small = 1 << right_shift_small; + + // Test values for motion vectors. + int mv_vals_small[4] = { 1, 2, 3, 4 }; + int index = 0; + + // 4x4 blocks means we need to allocate a 4 size array. + // According to av1_tpl_ptr_pos: + // (row >> right_shift) * stride + (col >> right_shift) + // (4 >> 1) * 1 + (4 >> 1) = 4 + TplDepStats stats_buf_small[4]; + tpl_frame_small.tpl_stats_ptr = stats_buf_small; + + for (int row = 0; row < tpl_frame_small.mi_rows; row += step_small) { + for (int col = 0; col < tpl_frame_small.mi_cols; col += step_small) { + TplDepStats tpl_stats; + tpl_stats.ref_frame_index[0] = 0; + int_mv mv; + mv.as_mv.row = mv_vals_small[index]; + mv.as_mv.col = mv_vals_small[index]; + index++; + tpl_stats.mv[0] = mv; + tpl_frame_small.tpl_stats_ptr[av1_tpl_ptr_pos( + row, col, tpl_frame_small.stride, right_shift_small)] = tpl_stats; + } + } + + int_mv result_mv = + av1_compute_mv_difference(&tpl_frame_small, 1, 1, step_small, + tpl_frame_small.stride, right_shift_small); + + // Expect the result to be exactly equal to 1 because this is the difference + // between neighboring motion vectors in this instance. + EXPECT_EQ(result_mv.as_mv.row, 1); + EXPECT_EQ(result_mv.as_mv.col, 1); +} + +TEST(TplModelTest, ComputeMVBitsTest) { + TplDepFrame tpl_frame; + tpl_frame.is_valid = true; + tpl_frame.mi_rows = 16; + tpl_frame.mi_cols = 16; + tpl_frame.stride = 24; + uint8_t right_shift = 2; + int step = 1 << right_shift; + // Test values for motion vectors. + int mv_vals_ordered[16] = { 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + int mv_vals[16] = { 1, 16, 2, 15, 3, 14, 4, 13, 5, 12, 6, 11, 7, 10, 8, 9 }; + int index = 0; + + // 16x16 blocks means we need to allocate a 100 size array. + // According to av1_tpl_ptr_pos: + // (row >> right_shift) * stride + (col >> right_shift) + // (16 >> 2) * 24 + (16 >> 2) = 100 + TplDepStats stats_buf[100]; + tpl_frame.tpl_stats_ptr = stats_buf; + + for (int row = 0; row < tpl_frame.mi_rows; row += step) { + for (int col = 0; col < tpl_frame.mi_cols; col += step) { + TplDepStats tpl_stats; + tpl_stats.ref_frame_index[0] = 0; + int_mv mv; + mv.as_mv.row = mv_vals_ordered[index]; + mv.as_mv.col = mv_vals_ordered[index]; + index++; + tpl_stats.mv[0] = mv; + tpl_frame.tpl_stats_ptr[av1_tpl_ptr_pos(row, col, tpl_frame.stride, + right_shift)] = tpl_stats; + } + } + + double result = av1_tpl_compute_frame_mv_entropy(&tpl_frame, right_shift); + + // Expect the result to be low because the motion vectors are ordered. + // The estimation algorithm takes this into account and reduces the cost. + EXPECT_NEAR(result, 20, 5); + + index = 0; + for (int row = 0; row < tpl_frame.mi_rows; row += step) { + for (int col = 0; col < tpl_frame.mi_cols; col += step) { + TplDepStats tpl_stats; + tpl_stats.ref_frame_index[0] = 0; + int_mv mv; + mv.as_mv.row = mv_vals[index]; + mv.as_mv.col = mv_vals[index]; + index++; + tpl_stats.mv[0] = mv; + tpl_frame.tpl_stats_ptr[av1_tpl_ptr_pos(row, col, tpl_frame.stride, + right_shift)] = tpl_stats; + } + } + + result = av1_tpl_compute_frame_mv_entropy(&tpl_frame, right_shift); + + // Expect the result to be higher because the vectors are not ordered. + // Neighboring vectors will have different values, increasing the cost. + EXPECT_NEAR(result, 70, 5); +} +#if CONFIG_BITRATE_ACCURACY + +TEST(TplModelTest, VbrRcInfoSetGopBitBudget) { + VBR_RATECTRL_INFO vbr_rc_info; + const double total_bit_budget = 2000; + const int show_frame_count = 8; + const int gop_show_frame_count = 4; + av1_vbr_rc_init(&vbr_rc_info, total_bit_budget, show_frame_count); + av1_vbr_rc_set_gop_bit_budget(&vbr_rc_info, gop_show_frame_count); + EXPECT_NEAR(vbr_rc_info.gop_bit_budget, 1000, epsilon); +} + +void init_toy_gf_group(GF_GROUP *gf_group) { + av1_zero(*gf_group); + gf_group->size = 4; + const FRAME_UPDATE_TYPE update_type[4] = { KF_UPDATE, ARF_UPDATE, + INTNL_ARF_UPDATE, LF_UPDATE }; + for (int i = 0; i < gf_group->size; ++i) { + gf_group->update_type[i] = update_type[i]; + } +} + +void init_toy_vbr_rc_info(VBR_RATECTRL_INFO *vbr_rc_info, int gop_size) { + int total_bit_budget = 2000; + int show_frame_count = 8; + av1_vbr_rc_init(vbr_rc_info, total_bit_budget, show_frame_count); + + for (int i = 0; i < gop_size; ++i) { + vbr_rc_info->qstep_ratio_list[i] = 1; + } +} + +void init_toy_tpl_txfm_stats(std::vector<TplTxfmStats> *stats_list) { + for (size_t i = 0; i < stats_list->size(); i++) { + TplTxfmStats *txfm_stats = &stats_list->at(i); + av1_init_tpl_txfm_stats(txfm_stats); + txfm_stats->txfm_block_count = 8; + for (int j = 0; j < txfm_stats->coeff_num; j++) { + txfm_stats->abs_coeff_sum[j] = 1000 + j; + } + av1_tpl_txfm_stats_update_abs_coeff_mean(txfm_stats); + } +} + +/* + * Helper method to brute-force search for the closest q_index + * that achieves the specified bit budget. + */ +int find_gop_q_iterative(double bit_budget, aom_bit_depth_t bit_depth, + const double *update_type_scale_factors, + int frame_count, + const FRAME_UPDATE_TYPE *update_type_list, + const double *qstep_ratio_list, + const TplTxfmStats *stats_list, int *q_index_list, + double *estimated_bitrate_byframe) { + int best_q = 255; + double curr_estimate = av1_vbr_rc_info_estimate_gop_bitrate( + best_q, bit_depth, update_type_scale_factors, frame_count, + update_type_list, qstep_ratio_list, stats_list, q_index_list, + estimated_bitrate_byframe); + double min_bits_diff = fabs(curr_estimate - bit_budget); + // Start at q = 254 because we already have an estimate for q = 255. + for (int q = 254; q >= 0; q--) { + curr_estimate = av1_vbr_rc_info_estimate_gop_bitrate( + q, bit_depth, update_type_scale_factors, frame_count, update_type_list, + qstep_ratio_list, stats_list, q_index_list, estimated_bitrate_byframe); + double bits_diff = fabs(curr_estimate - bit_budget); + if (bits_diff <= min_bits_diff) { + min_bits_diff = bits_diff; + best_q = q; + } + } + return best_q; +} + +TEST(TplModelTest, EstimateFrameRateTest) { + GF_GROUP gf_group; + init_toy_gf_group(&gf_group); + + VBR_RATECTRL_INFO vbr_rc_info; + init_toy_vbr_rc_info(&vbr_rc_info, gf_group.size); + + std::vector<TplTxfmStats> stats_list(gf_group.size); + init_toy_tpl_txfm_stats(&stats_list); + + std::vector<double> est_bitrate_list(gf_group.size); + init_toy_tpl_txfm_stats(&stats_list); + const aom_bit_depth_t bit_depth = AOM_BITS_8; + + const int q = 125; + + // Case1: all scale factors are 0 + double scale_factors[FRAME_UPDATE_TYPES] = { 0 }; + double estimate = av1_vbr_rc_info_estimate_gop_bitrate( + q, bit_depth, scale_factors, gf_group.size, gf_group.update_type, + vbr_rc_info.qstep_ratio_list, stats_list.data(), vbr_rc_info.q_index_list, + est_bitrate_list.data()); + EXPECT_NEAR(estimate, 0, epsilon); + + // Case2: all scale factors are 1 + for (int i = 0; i < FRAME_UPDATE_TYPES; i++) { + scale_factors[i] = 1; + } + estimate = av1_vbr_rc_info_estimate_gop_bitrate( + q, bit_depth, scale_factors, gf_group.size, gf_group.update_type, + vbr_rc_info.qstep_ratio_list, stats_list.data(), vbr_rc_info.q_index_list, + est_bitrate_list.data()); + double ref_estimate = 0; + for (int i = 0; i < gf_group.size; i++) { + ref_estimate += est_bitrate_list[i]; + } + EXPECT_NEAR(estimate, ref_estimate, epsilon); + + // Case3: Key frame scale factor is 0 and others are 1 + for (int i = 0; i < FRAME_UPDATE_TYPES; i++) { + if (i == KF_UPDATE) { + scale_factors[i] = 0; + } else { + scale_factors[i] = 1; + } + } + estimate = av1_vbr_rc_info_estimate_gop_bitrate( + q, bit_depth, scale_factors, gf_group.size, gf_group.update_type, + vbr_rc_info.qstep_ratio_list, stats_list.data(), vbr_rc_info.q_index_list, + est_bitrate_list.data()); + ref_estimate = 0; + for (int i = 0; i < gf_group.size; i++) { + if (gf_group.update_type[i] != KF_UPDATE) { + ref_estimate += est_bitrate_list[i]; + } + } + EXPECT_NEAR(estimate, ref_estimate, epsilon); +} + +TEST(TplModelTest, VbrRcInfoEstimateBaseQTest) { + GF_GROUP gf_group; + init_toy_gf_group(&gf_group); + + VBR_RATECTRL_INFO vbr_rc_info; + init_toy_vbr_rc_info(&vbr_rc_info, gf_group.size); + + std::vector<TplTxfmStats> stats_list(gf_group.size); + init_toy_tpl_txfm_stats(&stats_list); + const aom_bit_depth_t bit_depth = AOM_BITS_8; + + // Test multiple bit budgets. + const std::vector<double> bit_budgets = { 0, 2470, 19200, 30750, + 41315, 65017, DBL_MAX }; + + for (double bit_budget : bit_budgets) { + // Binary search method to find the optimal q. + const int base_q = av1_vbr_rc_info_estimate_base_q( + bit_budget, bit_depth, vbr_rc_info.scale_factors, gf_group.size, + gf_group.update_type, vbr_rc_info.qstep_ratio_list, stats_list.data(), + vbr_rc_info.q_index_list, nullptr); + const int ref_base_q = find_gop_q_iterative( + bit_budget, bit_depth, vbr_rc_info.scale_factors, gf_group.size, + gf_group.update_type, vbr_rc_info.qstep_ratio_list, stats_list.data(), + vbr_rc_info.q_index_list, nullptr); + if (bit_budget == 0) { + EXPECT_EQ(base_q, 255); + } else if (bit_budget == DBL_MAX) { + EXPECT_EQ(base_q, 0); + } + EXPECT_EQ(base_q, ref_base_q); + } +} +#endif // CONFIG_BITRATE_ACCURACY + +} // namespace diff --git a/third_party/aom/test/transform_test_base.h b/third_party/aom/test/transform_test_base.h new file mode 100644 index 0000000000..55e78fef48 --- /dev/null +++ b/third_party/aom/test/transform_test_base.h @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_TRANSFORM_TEST_BASE_H_ +#define AOM_TEST_TRANSFORM_TEST_BASE_H_ + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "aom/aom_codec.h" +#include "aom_dsp/txfm_common.h" +#include "aom_mem/aom_mem.h" +#include "test/acm_random.h" + +namespace libaom_test { + +// Note: +// Same constant are defined in av1/common/av1_entropy.h and +// av1/common/entropy.h. Goal is to make this base class +// to use for future codec transform testing. But including +// either of them would lead to compiling error when we do +// unit test for another codec. Suggest to move the definition +// to a aom header file. +const int kDctMaxValue = 16384; + +template <typename OutputType> +using FhtFunc = void (*)(const int16_t *in, OutputType *out, int stride, + TxfmParam *txfm_param); + +template <typename OutputType> +using IhtFunc = void (*)(const tran_low_t *in, uint8_t *out, int stride, + const TxfmParam *txfm_param); + +template <typename OutType> +class TransformTestBase { + public: + virtual ~TransformTestBase() = default; + + protected: + virtual void RunFwdTxfm(const int16_t *in, OutType *out, int stride) = 0; + + virtual void RunInvTxfm(const OutType *out, uint8_t *dst, int stride) = 0; + + void RunAccuracyCheck(uint32_t ref_max_error, double ref_avg_error) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + uint32_t max_error = 0; + int64_t total_error = 0; + const int count_test_block = 10000; + + int16_t *test_input_block = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + ASSERT_NE(test_input_block, nullptr); + OutType *test_temp_block = reinterpret_cast<OutType *>( + aom_memalign(16, sizeof(test_temp_block[0]) * num_coeffs_)); + ASSERT_NE(test_temp_block, nullptr); + uint8_t *dst = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * num_coeffs_)); + ASSERT_NE(dst, nullptr); + uint8_t *src = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * num_coeffs_)); + ASSERT_NE(src, nullptr); + uint16_t *dst16 = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * num_coeffs_)); + ASSERT_NE(dst16, nullptr); + uint16_t *src16 = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * num_coeffs_)); + ASSERT_NE(src16, nullptr); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < num_coeffs_; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + test_input_block[j] = src[j] - dst[j]; + } else { + src16[j] = rnd.Rand16() & mask_; + dst16[j] = rnd.Rand16() & mask_; + test_input_block[j] = src16[j] - dst16[j]; + } + } + + API_REGISTER_STATE_CHECK( + RunFwdTxfm(test_input_block, test_temp_block, pitch_)); + if (bit_depth_ == AOM_BITS_8) { + API_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); + } else { + API_REGISTER_STATE_CHECK( + RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); + } + + for (int j = 0; j < num_coeffs_; ++j) { + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; + const uint32_t error = diff * diff; + if (max_error < error) max_error = error; + total_error += error; + } + } + + double avg_error = total_error * 1. / count_test_block / num_coeffs_; + + EXPECT_GE(ref_max_error, max_error) + << "Error: FHT/IHT has an individual round trip error > " + << ref_max_error; + + EXPECT_GE(ref_avg_error, avg_error) + << "Error: FHT/IHT has average round trip error > " << ref_avg_error + << " per block"; + + aom_free(test_input_block); + aom_free(test_temp_block); + aom_free(dst); + aom_free(src); + aom_free(dst16); + aom_free(src16); + } + + void RunCoeffCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + + // Use a stride value which is not the width of any transform, to catch + // cases where the transforms use the stride incorrectly. + int stride = 96; + + int16_t *input_block = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * stride * height_)); + ASSERT_NE(input_block, nullptr); + OutType *output_ref_block = reinterpret_cast<OutType *>( + aom_memalign(16, sizeof(output_ref_block[0]) * num_coeffs_)); + ASSERT_NE(output_ref_block, nullptr); + OutType *output_block = reinterpret_cast<OutType *>( + aom_memalign(16, sizeof(output_block[0]) * num_coeffs_)); + ASSERT_NE(output_block, nullptr); + + for (int i = 0; i < count_test_block; ++i) { + int j, k; + for (j = 0; j < height_; ++j) { + for (k = 0; k < pitch_; ++k) { + int in_idx = j * stride + k; + int out_idx = j * pitch_ + k; + input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + if (bit_depth_ == AOM_BITS_8) { + output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8(); + } else { + output_block[out_idx] = output_ref_block[out_idx] = + rnd.Rand16() & mask_; + } + } + } + + fwd_txfm_ref(input_block, output_ref_block, stride, &txfm_param_); + API_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, stride)); + + // The minimum quant value is 4. + for (j = 0; j < height_; ++j) { + for (k = 0; k < pitch_; ++k) { + int out_idx = j * pitch_ + k; + ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx]) + << "Error: not bit-exact result at index: " << out_idx + << " at test block: " << i; + } + } + } + aom_free(input_block); + aom_free(output_ref_block); + aom_free(output_block); + } + + void RunInvCoeffCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + + // Use a stride value which is not the width of any transform, to catch + // cases where the transforms use the stride incorrectly. + int stride = 96; + + int16_t *input_block = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + ASSERT_NE(input_block, nullptr); + OutType *trans_block = reinterpret_cast<OutType *>( + aom_memalign(16, sizeof(trans_block[0]) * num_coeffs_)); + ASSERT_NE(trans_block, nullptr); + uint8_t *output_block = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * stride * height_)); + ASSERT_NE(output_block, nullptr); + uint8_t *output_ref_block = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * stride * height_)); + ASSERT_NE(output_ref_block, nullptr); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + int j, k; + for (j = 0; j < height_; ++j) { + for (k = 0; k < pitch_; ++k) { + int in_idx = j * pitch_ + k; + int out_idx = j * stride + k; + input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); + output_ref_block[out_idx] = rnd.Rand16() & mask_; + output_block[out_idx] = output_ref_block[out_idx]; + } + } + + fwd_txfm_ref(input_block, trans_block, pitch_, &txfm_param_); + + inv_txfm_ref(trans_block, output_ref_block, stride, &txfm_param_); + API_REGISTER_STATE_CHECK(RunInvTxfm(trans_block, output_block, stride)); + + for (j = 0; j < height_; ++j) { + for (k = 0; k < pitch_; ++k) { + int out_idx = j * stride + k; + ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx]) + << "Error: not bit-exact result at index: " << out_idx + << " j = " << j << " k = " << k << " at test block: " << i; + } + } + } + aom_free(input_block); + aom_free(trans_block); + aom_free(output_ref_block); + aom_free(output_block); + } + + void RunMemCheck() { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + + int16_t *input_extreme_block = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + ASSERT_NE(input_extreme_block, nullptr); + OutType *output_ref_block = reinterpret_cast<OutType *>( + aom_memalign(16, sizeof(output_ref_block[0]) * num_coeffs_)); + ASSERT_NE(output_ref_block, nullptr); + OutType *output_block = reinterpret_cast<OutType *>( + aom_memalign(16, sizeof(output_block[0]) * num_coeffs_)); + ASSERT_NE(output_block, nullptr); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < num_coeffs_; ++j) { + input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_; + } + if (i == 0) { + for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = mask_; + } else if (i == 1) { + for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = -mask_; + } + + fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, &txfm_param_); + API_REGISTER_STATE_CHECK( + RunFwdTxfm(input_extreme_block, output_block, pitch_)); + + int row_length = FindRowLength(); + // The minimum quant value is 4. + for (int j = 0; j < num_coeffs_; ++j) { + ASSERT_EQ(output_block[j], output_ref_block[j]) + << "Not bit-exact at test index: " << i << ", " + << "j = " << j << std::endl; + EXPECT_GE(row_length * kDctMaxValue << (bit_depth_ - 8), + abs(output_block[j])) + << "Error: NxN FDCT has coefficient larger than N*DCT_MAX_VALUE"; + } + } + aom_free(input_extreme_block); + aom_free(output_ref_block); + aom_free(output_block); + } + + void RunInvAccuracyCheck(int limit) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + + int16_t *in = reinterpret_cast<int16_t *>( + aom_memalign(16, sizeof(int16_t) * num_coeffs_)); + ASSERT_NE(in, nullptr); + OutType *coeff = reinterpret_cast<OutType *>( + aom_memalign(16, sizeof(coeff[0]) * num_coeffs_)); + ASSERT_NE(coeff, nullptr); + uint8_t *dst = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * num_coeffs_)); + ASSERT_NE(dst, nullptr); + uint8_t *src = reinterpret_cast<uint8_t *>( + aom_memalign(16, sizeof(uint8_t) * num_coeffs_)); + ASSERT_NE(src, nullptr); + + uint16_t *dst16 = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * num_coeffs_)); + ASSERT_NE(dst16, nullptr); + uint16_t *src16 = reinterpret_cast<uint16_t *>( + aom_memalign(16, sizeof(uint16_t) * num_coeffs_)); + ASSERT_NE(src16, nullptr); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-mask_, mask_]. + for (int j = 0; j < num_coeffs_; ++j) { + if (bit_depth_ == AOM_BITS_8) { + src[j] = rnd.Rand8(); + dst[j] = rnd.Rand8(); + in[j] = src[j] - dst[j]; + } else { + src16[j] = rnd.Rand16() & mask_; + dst16[j] = rnd.Rand16() & mask_; + in[j] = src16[j] - dst16[j]; + } + } + + fwd_txfm_ref(in, coeff, pitch_, &txfm_param_); + + if (bit_depth_ == AOM_BITS_8) { + API_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); + } else { + API_REGISTER_STATE_CHECK( + RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_)); + } + + for (int j = 0; j < num_coeffs_; ++j) { + const int diff = + bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; + const uint32_t error = diff * diff; + ASSERT_GE(static_cast<uint32_t>(limit), error) + << "Error: 4x4 IDCT has error " << error << " at index " << j; + } + } + aom_free(in); + aom_free(coeff); + aom_free(dst); + aom_free(src); + aom_free(src16); + aom_free(dst16); + } + + int pitch_; + int height_; + FhtFunc<OutType> fwd_txfm_ref; + IhtFunc<OutType> inv_txfm_ref; + aom_bit_depth_t bit_depth_; + int mask_; + int num_coeffs_; + TxfmParam txfm_param_; + + private: + // Assume transform size is 4x4, 8x8, 16x16,... + int FindRowLength() const { + int row = 4; + if (16 == num_coeffs_) { + row = 4; + } else if (64 == num_coeffs_) { + row = 8; + } else if (256 == num_coeffs_) { + row = 16; + } else if (1024 == num_coeffs_) { + row = 32; + } + return row; + } +}; + +} // namespace libaom_test + +#endif // AOM_TEST_TRANSFORM_TEST_BASE_H_ diff --git a/third_party/aom/test/twopass_encoder.sh b/third_party/aom/test/twopass_encoder.sh new file mode 100755 index 0000000000..44e7327b8f --- /dev/null +++ b/third_party/aom/test/twopass_encoder.sh @@ -0,0 +1,54 @@ +#!/bin/sh +## Copyright (c) 2016, Alliance for Open Media. All rights reserved +## +## This source code is subject to the terms of the BSD 2 Clause License and +## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +## was not distributed with this source code in the LICENSE file, you can +## obtain it at www.aomedia.org/license/software. If the Alliance for Open +## Media Patent License 1.0 was not distributed with this source code in the +## PATENTS file, you can obtain it at www.aomedia.org/license/patent. +## +## This file tests the libaom twopass_encoder example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to twopass_encoder_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +twopass_encoder_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH." + return 1 + fi +} + +# Runs twopass_encoder using the codec specified by $1 with a frame limit of +# 100. +twopass_encoder() { + local encoder="$(aom_tool_path twopass_encoder)" + local codec="$1" + local output_file="${AOM_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf" + local limit=7 + + if [ ! -x "${encoder}" ]; then + elog "${encoder} does not exist or is not executable." + return 1 + fi + + eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ + "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" "${limit}" \ + ${devnull} || return 1 + + [ -e "${output_file}" ] || return 1 +} + +twopass_encoder_av1() { + if [ "$(av1_encode_available)" = "yes" ]; then + twopass_encoder av1 || return 1 + fi +} + +twopass_encoder_tests="twopass_encoder_av1" + +run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}" diff --git a/third_party/aom/test/util.h b/third_party/aom/test/util.h new file mode 100644 index 0000000000..29df709c4f --- /dev/null +++ b/third_party/aom/test/util.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_UTIL_H_ +#define AOM_TEST_UTIL_H_ + +#include <math.h> +#include <stdio.h> +#include <string.h> +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "aom/aom_image.h" +#include "aom_ports/aom_timer.h" + +// Macros +#define GET_PARAM(k) std::get<k>(GetParam()) + +inline int is_extension_y4m(const char *filename) { + const char *dot = strrchr(filename, '.'); + if (!dot || dot == filename) return 0; + + return !strcmp(dot, ".y4m"); +} + +inline double compute_psnr(const aom_image_t *img1, const aom_image_t *img2) { + assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) && + (img1->d_h == img2->d_h)); + + const unsigned int width_y = img1->d_w; + const unsigned int height_y = img1->d_h; + unsigned int i, j; + + int64_t sqrerr = 0; + for (i = 0; i < height_y; ++i) + for (j = 0; j < width_y; ++j) { + int64_t d = img1->planes[AOM_PLANE_Y][i * img1->stride[AOM_PLANE_Y] + j] - + img2->planes[AOM_PLANE_Y][i * img2->stride[AOM_PLANE_Y] + j]; + sqrerr += d * d; + } + double mse = static_cast<double>(sqrerr) / (width_y * height_y); + double psnr = 100.0; + if (mse > 0.0) { + psnr = 10 * log10(255.0 * 255.0 / mse); + } + return psnr; +} + +static INLINE double get_time_mark(aom_usec_timer *t) { + aom_usec_timer_mark(t); + return static_cast<double>(aom_usec_timer_elapsed(t)); +} + +#endif // AOM_TEST_UTIL_H_ diff --git a/third_party/aom/test/variance_test.cc b/third_party/aom/test/variance_test.cc new file mode 100644 index 0000000000..a493a1f4cb --- /dev/null +++ b/third_party/aom/test/variance_test.cc @@ -0,0 +1,4370 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <cstdlib> +#include <new> +#include <ostream> +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_mem/aom_mem.h" +#include "aom_ports/aom_timer.h" +#include "aom_ports/mem.h" +#include "av1/common/cdef_block.h" + +namespace { + +typedef uint64_t (*MseWxH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src, + int sstride, int w, int h); +typedef uint64_t (*Mse16xH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src, + int w, int h); +typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + unsigned int *sse); +typedef void (*GetSseSum8x8QuadFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + uint32_t *sse8x8, int *sum8x8, + unsigned int *tot_sse, int *tot_sum, + uint32_t *var8x8); +typedef void (*GetSseSum16x16DualFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + uint32_t *sse16x16, + unsigned int *tot_sse, int *tot_sum, + uint32_t *var16x16); +typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride, + int xoffset, int yoffset, + const uint8_t *b, int b_stride, + unsigned int *sse); +typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride, + int xoffset, int yoffset, + const uint8_t *b, int b_stride, + uint32_t *sse, + const uint8_t *second_pred); +typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src); +typedef unsigned int (*DistWtdSubpixAvgVarMxNFunc)( + const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b, + int b_stride, uint32_t *sse, const uint8_t *second_pred, + const DIST_WTD_COMP_PARAMS *jcp_param); + +#if !CONFIG_REALTIME_ONLY +typedef uint32_t (*ObmcSubpelVarFunc)(const uint8_t *pre, int pre_stride, + int xoffset, int yoffset, + const int32_t *wsrc, const int32_t *mask, + unsigned int *sse); +#endif + +using libaom_test::ACMRandom; + +// Truncate high bit depth results by downshifting (with rounding) by: +// 2 * (bit_depth - 8) for sse +// (bit_depth - 8) for se +static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) { + switch (bit_depth) { + case AOM_BITS_12: + *sse = (*sse + 128) >> 8; + *se = (*se + 8) >> 4; + break; + case AOM_BITS_10: + *sse = (*sse + 8) >> 4; + *se = (*se + 2) >> 2; + break; + case AOM_BITS_8: + default: break; + } +} + +static unsigned int mb_ss_ref(const int16_t *src) { + unsigned int res = 0; + for (int i = 0; i < 256; ++i) { + res += src[i] * src[i]; + } + return res; +} + +/* Note: + * Our codebase calculates the "diff" value in the variance algorithm by + * (src - ref). + */ +static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w, + int l2h, int src_stride, int ref_stride, + uint32_t *sse_ptr, bool use_high_bit_depth_, + aom_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int diff; + if (!use_high_bit_depth_) { + diff = src[y * src_stride + x] - ref[y * ref_stride + x]; + se += diff; + sse += diff * diff; + } else { + diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] - + CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x]; + se += diff; + sse += diff * diff; + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast<uint32_t>(sse); + return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); +} + +/* The subpel reference functions differ from the codec version in one aspect: + * they calculate the bilinear factors directly instead of using a lookup table + * and therefore upshift xoff and yoff by 1. Only every other calculated value + * is used so the codec version shrinks the table to save space. + */ +static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src, + int l2w, int l2h, int xoff, int yoff, + uint32_t *sse_ptr, bool use_high_bit_depth_, + aom_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + + xoff <<= 1; + yoff <<= 1; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + // Bilinear interpolation at a 16th pel step. + if (!use_high_bit_depth_) { + const int a1 = ref[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = r - src[w * y + x]; + se += diff; + sse += diff * diff; + } else { + uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); + uint16_t *src16 = CONVERT_TO_SHORTPTR(src); + const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = r - src16[w * y + x]; + se += diff; + sse += diff * diff; + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast<uint32_t>(sse); + return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); +} + +static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src, + const uint8_t *second_pred, int l2w, + int l2h, int xoff, int yoff, + uint32_t *sse_ptr, + bool use_high_bit_depth, + aom_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + + xoff <<= 1; + yoff <<= 1; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + // bilinear interpolation at a 16th pel step + if (!use_high_bit_depth) { + const int a1 = ref[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = + ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x]; + se += diff; + sse += diff * diff; + } else { + const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); + const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); + const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred); + const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x]; + se += diff; + sse += diff * diff; + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast<uint32_t>(sse); + return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); +} + +static uint32_t dist_wtd_subpel_avg_variance_ref( + const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w, + int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth, + aom_bit_depth_t bit_depth, DIST_WTD_COMP_PARAMS *jcp_param) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + + xoff <<= 1; + yoff <<= 1; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + // bilinear interpolation at a 16th pel step + if (!use_high_bit_depth) { + const int a1 = ref[(w + 0) * (y + 0) + x + 0]; + const int a2 = ref[(w + 0) * (y + 0) + x + 1]; + const int b1 = ref[(w + 0) * (y + 1) + x + 0]; + const int b2 = ref[(w + 0) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int avg = ROUND_POWER_OF_TWO( + r * jcp_param->fwd_offset + + second_pred[w * y + x] * jcp_param->bck_offset, + DIST_PRECISION_BITS); + const int diff = avg - src[w * y + x]; + + se += diff; + sse += diff * diff; + } else { + const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); + const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); + const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred); + const int a1 = ref16[(w + 0) * (y + 0) + x + 0]; + const int a2 = ref16[(w + 0) * (y + 0) + x + 1]; + const int b1 = ref16[(w + 0) * (y + 1) + x + 0]; + const int b2 = ref16[(w + 0) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int avg = + ROUND_POWER_OF_TWO(r * jcp_param->fwd_offset + + sec16[w * y + x] * jcp_param->bck_offset, + DIST_PRECISION_BITS); + const int diff = avg - src16[w * y + x]; + + se += diff; + sse += diff * diff; + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast<uint32_t>(sse); + return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); +} + +#if !CONFIG_REALTIME_ONLY +static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h, + int xoff, int yoff, + const int32_t *wsrc, + const int32_t *mask, uint32_t *sse_ptr, + bool use_high_bit_depth_, + aom_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + + xoff <<= 1; + yoff <<= 1; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + // Bilinear interpolation at a 16th pel step. + if (!use_high_bit_depth_) { + const int a1 = pre[(w + 1) * (y + 0) + x + 0]; + const int a2 = pre[(w + 1) * (y + 0) + x + 1]; + const int b1 = pre[(w + 1) * (y + 1) + x + 0]; + const int b2 = pre[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = ROUND_POWER_OF_TWO_SIGNED( + wsrc[w * y + x] - r * mask[w * y + x], 12); + se += diff; + sse += diff * diff; + } else { + uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre); + const int a1 = pre16[(w + 1) * (y + 0) + x + 0]; + const int a2 = pre16[(w + 1) * (y + 0) + x + 1]; + const int b1 = pre16[(w + 1) * (y + 1) + x + 0]; + const int b2 = pre16[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = ROUND_POWER_OF_TWO_SIGNED( + wsrc[w * y + x] - r * mask[w * y + x], 12); + se += diff; + sse += diff * diff; + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast<uint32_t>(sse); + return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); +} +#endif + +//////////////////////////////////////////////////////////////////////////////// + +class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> { + public: + SumOfSquaresTest() : func_(GetParam()) {} + + ~SumOfSquaresTest() override = default; + + protected: + void ConstTest(); + void RefTest(); + + SumOfSquaresFunction func_; + ACMRandom rnd_; +}; + +void SumOfSquaresTest::ConstTest() { + int16_t mem[256]; + unsigned int res; + for (int v = 0; v < 256; ++v) { + for (int i = 0; i < 256; ++i) { + mem[i] = v; + } + API_REGISTER_STATE_CHECK(res = func_(mem)); + EXPECT_EQ(256u * (v * v), res); + } +} + +void SumOfSquaresTest::RefTest() { + int16_t mem[256]; + for (int i = 0; i < 100; ++i) { + for (int j = 0; j < 256; ++j) { + mem[j] = rnd_.Rand8() - rnd_.Rand8(); + } + + const unsigned int expected = mb_ss_ref(mem); + unsigned int res; + API_REGISTER_STATE_CHECK(res = func_(mem)); + EXPECT_EQ(expected, res); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Encapsulating struct to store the function to test along with +// some testing context. +// Can be used for MSE, SSE, Variance, etc. + +template <typename Func> +struct TestParams { + TestParams(int log2w = 0, int log2h = 0, Func function = nullptr, + int bit_depth_value = 0) + : log2width(log2w), log2height(log2h), func(function) { + use_high_bit_depth = (bit_depth_value > 0); + if (use_high_bit_depth) { + bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value); + } else { + bit_depth = AOM_BITS_8; + } + width = 1 << log2width; + height = 1 << log2height; + block_size = width * height; + mask = (1u << bit_depth) - 1; + } + + int log2width, log2height; + int width, height; + int block_size; + Func func; + aom_bit_depth_t bit_depth; + bool use_high_bit_depth; + uint32_t mask; +}; + +template <typename Func> +std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) { + return os << "width/height:" << p.width << "/" << p.height + << " function:" << reinterpret_cast<const void *>(p.func) + << " bit-depth:" << p.bit_depth; +} + +// Main class for testing a function type +template <typename FunctionType> +class MseWxHTestClass + : public ::testing::TestWithParam<TestParams<FunctionType> > { + public: + void SetUp() override { + params_ = this->GetParam(); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, block_size() * sizeof(src_))); + dst_ = reinterpret_cast<uint8_t *>( + aom_memalign(16, block_size() * sizeof(dst_))); + ASSERT_NE(src_, nullptr); + ASSERT_NE(dst_, nullptr); + } + + void TearDown() override { + aom_free(src_); + aom_free(dst_); + src_ = nullptr; + dst_ = nullptr; + } + + protected: + void RefMatchTestMse(); + void SpeedTest(); + + protected: + ACMRandom rnd_; + uint8_t *dst_; + uint16_t *src_; + TestParams<FunctionType> params_; + + // some relay helpers + int block_size() const { return params_.block_size; } + int width() const { return params_.width; } + int height() const { return params_.height; } + int d_stride() const { return params_.width; } // stride is same as width + int s_stride() const { return params_.width; } // stride is same as width +}; + +template <typename MseWxHFunctionType> +void MseWxHTestClass<MseWxHFunctionType>::SpeedTest() { + aom_usec_timer ref_timer, test_timer; + double elapsed_time_c = 0; + double elapsed_time_simd = 0; + int run_time = 10000000; + int w = width(); + int h = height(); + int dstride = d_stride(); + int sstride = s_stride(); + + for (int k = 0; k < block_size(); ++k) { + dst_[k] = rnd_.Rand8(); + src_[k] = rnd_.Rand8(); + } + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < run_time; i++) { + aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h); + } + aom_usec_timer_mark(&ref_timer); + elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int i = 0; i < run_time; i++) { + params_.func(dst_, dstride, src_, sstride, w, h); + } + aom_usec_timer_mark(&test_timer); + elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer)); + + printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(), + elapsed_time_c, elapsed_time_simd, + (elapsed_time_c / elapsed_time_simd)); +} + +template <typename MseWxHFunctionType> +void MseWxHTestClass<MseWxHFunctionType>::RefMatchTestMse() { + uint64_t mse_ref = 0; + uint64_t mse_mod = 0; + int w = width(); + int h = height(); + int dstride = d_stride(); + int sstride = s_stride(); + + for (int i = 0; i < 10; i++) { + for (int k = 0; k < block_size(); ++k) { + dst_[k] = rnd_.Rand8(); + src_[k] = rnd_.Rand8(); + } + API_REGISTER_STATE_CHECK( + mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h)); + API_REGISTER_STATE_CHECK( + mse_mod = params_.func(dst_, dstride, src_, sstride, w, h)); + EXPECT_EQ(mse_ref, mse_mod) + << "ref mse: " << mse_ref << " mod mse: " << mse_mod; + } +} + +template <typename FunctionType> +class Mse16xHTestClass + : public ::testing::TestWithParam<TestParams<FunctionType> > { + public: + // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for + // maximum width 16 and maximum height 8. + int mem_size = 16 * 8; + void SetUp() override { + params_ = this->GetParam(); + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, mem_size * sizeof(*src_))); + dst_ = + reinterpret_cast<uint8_t *>(aom_memalign(16, mem_size * sizeof(*dst_))); + ASSERT_NE(src_, nullptr); + ASSERT_NE(dst_, nullptr); + } + + void TearDown() override { + aom_free(src_); + aom_free(dst_); + src_ = nullptr; + dst_ = nullptr; + } + + uint8_t RandBool() { + const uint32_t value = rnd_.Rand8(); + return (value & 0x1); + } + + protected: + void RefMatchExtremeTestMse(); + void RefMatchTestMse(); + void SpeedTest(); + + protected: + ACMRandom rnd_; + uint8_t *dst_; + uint16_t *src_; + TestParams<FunctionType> params_; + + // some relay helpers + int width() const { return params_.width; } + int height() const { return params_.height; } + int d_stride() const { return params_.width; } +}; + +template <typename Mse16xHFunctionType> +void Mse16xHTestClass<Mse16xHFunctionType>::SpeedTest() { + aom_usec_timer ref_timer, test_timer; + double elapsed_time_c = 0.0; + double elapsed_time_simd = 0.0; + const int loop_count = 10000000; + const int w = width(); + const int h = height(); + const int dstride = d_stride(); + + for (int k = 0; k < mem_size; ++k) { + dst_[k] = rnd_.Rand8(); + // Right shift by 6 is done to generate more input in range of [0,255] than + // CDEF_VERY_LARGE + int rnd_i10 = rnd_.Rand16() >> 6; + src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE; + } + + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < loop_count; i++) { + aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h); + } + aom_usec_timer_mark(&ref_timer); + elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int i = 0; i < loop_count; i++) { + params_.func(dst_, dstride, src_, w, h); + } + aom_usec_timer_mark(&test_timer); + elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer)); + + printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%.31f\n", width(), + height(), elapsed_time_c, elapsed_time_simd, + (elapsed_time_c / elapsed_time_simd)); +} + +template <typename Mse16xHFunctionType> +void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchTestMse() { + uint64_t mse_ref = 0; + uint64_t mse_mod = 0; + const int w = width(); + const int h = height(); + const int dstride = d_stride(); + + for (int i = 0; i < 10; i++) { + for (int k = 0; k < mem_size; ++k) { + dst_[k] = rnd_.Rand8(); + // Right shift by 6 is done to generate more input in range of [0,255] + // than CDEF_VERY_LARGE + int rnd_i10 = rnd_.Rand16() >> 6; + src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE; + } + + API_REGISTER_STATE_CHECK( + mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h)); + API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h)); + EXPECT_EQ(mse_ref, mse_mod) + << "ref mse: " << mse_ref << " mod mse: " << mse_mod; + } +} + +template <typename Mse16xHFunctionType> +void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchExtremeTestMse() { + uint64_t mse_ref = 0; + uint64_t mse_mod = 0; + const int w = width(); + const int h = height(); + const int dstride = d_stride(); + const int iter = 10; + + // Fill the buffers with extreme values + for (int i = 0; i < iter; i++) { + for (int k = 0; k < mem_size; ++k) { + dst_[k] = static_cast<uint8_t>(RandBool() ? 0 : 255); + src_[k] = static_cast<uint16_t>(RandBool() ? 0 : CDEF_VERY_LARGE); + } + + API_REGISTER_STATE_CHECK( + mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h)); + API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h)); + EXPECT_EQ(mse_ref, mse_mod) + << "ref mse: " << mse_ref << " mod mse: " << mse_mod; + } +} + +// Main class for testing a function type +template <typename FunctionType> +class MainTestClass + : public ::testing::TestWithParam<TestParams<FunctionType> > { + public: + void SetUp() override { + params_ = this->GetParam(); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + const size_t unit = + use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t); + src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit)); + ref_ = new uint8_t[block_size() * unit]; + ASSERT_NE(src_, nullptr); + ASSERT_NE(ref_, nullptr); + memset(src_, 0, block_size() * sizeof(src_[0])); + memset(ref_, 0, block_size() * sizeof(ref_[0])); + if (use_high_bit_depth()) { + // TODO(skal): remove! + src_ = CONVERT_TO_BYTEPTR(src_); + ref_ = CONVERT_TO_BYTEPTR(ref_); + } + } + + void TearDown() override { + if (use_high_bit_depth()) { + // TODO(skal): remove! + src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_)); + ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_)); + } + + aom_free(src_); + delete[] ref_; + src_ = nullptr; + ref_ = nullptr; + } + + protected: + // We could sub-class MainTestClass into dedicated class for Variance + // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing + // to access top class fields xxx. That's cumbersome, so for now we'll just + // implement the testing methods here: + + // Variance tests + void ZeroTest(); + void RefTest(); + void RefStrideTest(); + void OneQuarterTest(); + void SpeedTest(); + + // SSE&SUM tests + void RefTestSseSum(); + void MinTestSseSum(); + void MaxTestSseSum(); + void SseSum_SpeedTest(); + + // SSE&SUM dual tests + void RefTestSseSumDual(); + void MinTestSseSumDual(); + void MaxTestSseSumDual(); + void SseSum_SpeedTestDual(); + + // MSE/SSE tests + void RefTestMse(); + void RefTestSse(); + void MaxTestMse(); + void MaxTestSse(); + + protected: + ACMRandom rnd_; + uint8_t *src_; + uint8_t *ref_; + TestParams<FunctionType> params_; + + // some relay helpers + bool use_high_bit_depth() const { return params_.use_high_bit_depth; } + int byte_shift() const { return params_.bit_depth - 8; } + int block_size() const { return params_.block_size; } + int width() const { return params_.width; } + int height() const { return params_.height; } + uint32_t mask() const { return params_.mask; } +}; + +//////////////////////////////////////////////////////////////////////////////// +// Tests related to variance. + +template <typename VarianceFunctionType> +void MainTestClass<VarianceFunctionType>::ZeroTest() { + for (int i = 0; i <= 255; ++i) { + if (!use_high_bit_depth()) { + memset(src_, i, block_size()); + } else { + uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_); + for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift(); + } + for (int j = 0; j <= 255; ++j) { + if (!use_high_bit_depth()) { + memset(ref_, j, block_size()); + } else { + uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_); + for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift(); + } + unsigned int sse, var; + API_REGISTER_STATE_CHECK( + var = params_.func(src_, width(), ref_, width(), &sse)); + EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j; + } + } +} + +template <typename VarianceFunctionType> +void MainTestClass<VarianceFunctionType>::RefTest() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); j++) { + if (!use_high_bit_depth()) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); + } else { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); + } + } + unsigned int sse1, sse2, var1, var2; + const int stride = width(); + API_REGISTER_STATE_CHECK( + var1 = params_.func(src_, stride, ref_, stride, &sse1)); + var2 = + variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, + stride, &sse2, use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; + EXPECT_EQ(var1, var2) << "Error at test index: " << i; + } +} + +template <typename VarianceFunctionType> +void MainTestClass<VarianceFunctionType>::RefStrideTest() { + for (int i = 0; i < 10; ++i) { + const int ref_stride = (i & 1) * width(); + const int src_stride = ((i >> 1) & 1) * width(); + for (int j = 0; j < block_size(); j++) { + const int ref_ind = (j / width()) * ref_stride + j % width(); + const int src_ind = (j / width()) * src_stride + j % width(); + if (!use_high_bit_depth()) { + src_[src_ind] = rnd_.Rand8(); + ref_[ref_ind] = rnd_.Rand8(); + } else { + CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask(); + CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask(); + } + } + unsigned int sse1, sse2; + unsigned int var1, var2; + + API_REGISTER_STATE_CHECK( + var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1)); + var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height, + src_stride, ref_stride, &sse2, use_high_bit_depth(), + params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; + EXPECT_EQ(var1, var2) << "Error at test index: " << i; + } +} + +template <typename VarianceFunctionType> +void MainTestClass<VarianceFunctionType>::OneQuarterTest() { + const int half = block_size() / 2; + if (!use_high_bit_depth()) { + memset(src_, 255, block_size()); + memset(ref_, 255, half); + memset(ref_ + half, 0, half); + } else { + aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size()); + aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half); + aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half); + } + unsigned int sse, var, expected; + API_REGISTER_STATE_CHECK( + var = params_.func(src_, width(), ref_, width(), &sse)); + expected = block_size() * 255 * 255 / 4; + EXPECT_EQ(expected, var); +} + +template <typename VarianceFunctionType> +void MainTestClass<VarianceFunctionType>::SpeedTest() { + for (int j = 0; j < block_size(); j++) { + if (!use_high_bit_depth()) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); +#if CONFIG_AV1_HIGHBITDEPTH + } else { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); +#endif // CONFIG_AV1_HIGHBITDEPTH + } + } + unsigned int sse; + const int stride = width(); + int run_time = 1000000000 / block_size(); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_time; ++i) { + params_.func(src_, stride, ref_, stride, &sse); + } + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("Variance %dx%d : %d us\n", width(), height(), elapsed_time); +} + +template <typename GetSseSum8x8QuadFuncType> +void MainTestClass<GetSseSum8x8QuadFuncType>::RefTestSseSum() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); ++j) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); + } + unsigned int sse1[256] = { 0 }; + unsigned int sse2[256] = { 0 }; + unsigned int var1[256] = { 0 }; + unsigned int var2[256] = { 0 }; + int sum1[256] = { 0 }; + int sum2[256] = { 0 }; + unsigned int sse_tot_c = 0; + unsigned int sse_tot_simd = 0; + int sum_tot_c = 0; + int sum_tot_simd = 0; + const int stride = width(); + int k = 0; + + for (int row = 0; row < height(); row += 8) { + for (int col = 0; col < width(); col += 32) { + API_REGISTER_STATE_CHECK(params_.func(src_ + stride * row + col, stride, + ref_ + stride * row + col, stride, + &sse1[k], &sum1[k], &sse_tot_simd, + &sum_tot_simd, &var1[k])); + aom_get_var_sse_sum_8x8_quad_c( + src_ + stride * row + col, stride, ref_ + stride * row + col, + stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]); + k += 4; + } + } + EXPECT_EQ(sse_tot_c, sse_tot_simd); + EXPECT_EQ(sum_tot_c, sum_tot_simd); + for (int p = 0; p < 256; p++) { + EXPECT_EQ(sse1[p], sse2[p]); + EXPECT_EQ(sum1[p], sum2[p]); + EXPECT_EQ(var1[p], var2[p]); + } + } +} + +template <typename GetSseSum8x8QuadFuncType> +void MainTestClass<GetSseSum8x8QuadFuncType>::MinTestSseSum() { + memset(src_, 0, block_size()); + memset(ref_, 255, block_size()); + unsigned int sse1[256] = { 0 }; + unsigned int sse2[256] = { 0 }; + unsigned int var1[256] = { 0 }; + unsigned int var2[256] = { 0 }; + int sum1[256] = { 0 }; + int sum2[256] = { 0 }; + unsigned int sse_tot_c = 0; + unsigned int sse_tot_simd = 0; + int sum_tot_c = 0; + int sum_tot_simd = 0; + const int stride = width(); + int k = 0; + + for (int i = 0; i < height(); i += 8) { + for (int j = 0; j < width(); j += 32) { + API_REGISTER_STATE_CHECK(params_.func( + src_ + stride * i + j, stride, ref_ + stride * i + j, stride, + &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); + aom_get_var_sse_sum_8x8_quad_c( + src_ + stride * i + j, stride, ref_ + stride * i + j, stride, + &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]); + k += 4; + } + } + EXPECT_EQ(sse_tot_simd, sse_tot_c); + EXPECT_EQ(sum_tot_simd, sum_tot_c); + for (int p = 0; p < 256; p++) { + EXPECT_EQ(sse1[p], sse2[p]); + EXPECT_EQ(sum1[p], sum2[p]); + EXPECT_EQ(var1[p], var2[p]); + } +} + +template <typename GetSseSum8x8QuadFuncType> +void MainTestClass<GetSseSum8x8QuadFuncType>::MaxTestSseSum() { + memset(src_, 255, block_size()); + memset(ref_, 0, block_size()); + unsigned int sse1[256] = { 0 }; + unsigned int sse2[256] = { 0 }; + unsigned int var1[256] = { 0 }; + unsigned int var2[256] = { 0 }; + int sum1[256] = { 0 }; + int sum2[256] = { 0 }; + unsigned int sse_tot_c = 0; + unsigned int sse_tot_simd = 0; + int sum_tot_c = 0; + int sum_tot_simd = 0; + const int stride = width(); + int k = 0; + + for (int i = 0; i < height(); i += 8) { + for (int j = 0; j < width(); j += 32) { + API_REGISTER_STATE_CHECK(params_.func( + src_ + stride * i + j, stride, ref_ + stride * i + j, stride, + &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); + aom_get_var_sse_sum_8x8_quad_c( + src_ + stride * i + j, stride, ref_ + stride * i + j, stride, + &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]); + k += 4; + } + } + EXPECT_EQ(sse_tot_c, sse_tot_simd); + EXPECT_EQ(sum_tot_c, sum_tot_simd); + + for (int p = 0; p < 256; p++) { + EXPECT_EQ(sse1[p], sse2[p]); + EXPECT_EQ(sum1[p], sum2[p]); + EXPECT_EQ(var1[p], var2[p]); + } +} + +template <typename GetSseSum8x8QuadFuncType> +void MainTestClass<GetSseSum8x8QuadFuncType>::SseSum_SpeedTest() { + const int loop_count = 1000000000 / block_size(); + for (int j = 0; j < block_size(); ++j) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); + } + + unsigned int sse1[4] = { 0 }; + unsigned int sse2[4] = { 0 }; + unsigned int var1[4] = { 0 }; + unsigned int var2[4] = { 0 }; + int sum1[4] = { 0 }; + int sum2[4] = { 0 }; + unsigned int sse_tot_c = 0; + unsigned int sse_tot_simd = 0; + int sum_tot_c = 0; + int sum_tot_simd = 0; + const int stride = width(); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int r = 0; r < loop_count; ++r) { + for (int i = 0; i < height(); i += 8) { + for (int j = 0; j < width(); j += 32) { + aom_get_var_sse_sum_8x8_quad_c(src_ + stride * i + j, stride, + ref_ + stride * i + j, stride, sse2, + sum2, &sse_tot_c, &sum_tot_c, var2); + } + } + } + aom_usec_timer_mark(&timer); + const double elapsed_time_ref = + static_cast<double>(aom_usec_timer_elapsed(&timer)); + + aom_usec_timer_start(&timer); + for (int r = 0; r < loop_count; ++r) { + for (int i = 0; i < height(); i += 8) { + for (int j = 0; j < width(); j += 32) { + params_.func(src_ + stride * i + j, stride, ref_ + stride * i + j, + stride, sse1, sum1, &sse_tot_simd, &sum_tot_simd, var1); + } + } + } + aom_usec_timer_mark(&timer); + const double elapsed_time_simd = + static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf( + "aom_getvar_8x8_quad for block=%dx%d : ref_time=%lf \t simd_time=%lf \t " + "gain=%lf \n", + width(), height(), elapsed_time_ref, elapsed_time_simd, + elapsed_time_ref / elapsed_time_simd); +} + +template <typename GetSseSum16x16DualFuncType> +void MainTestClass<GetSseSum16x16DualFuncType>::RefTestSseSumDual() { + for (int iter = 0; iter < 10; ++iter) { + for (int idx = 0; idx < block_size(); ++idx) { + src_[idx] = rnd_.Rand8(); + ref_[idx] = rnd_.Rand8(); + } + unsigned int sse1[64] = { 0 }; + unsigned int sse2[64] = { 0 }; + unsigned int var1[64] = { 0 }; + unsigned int var2[64] = { 0 }; + unsigned int sse_tot_c = 0; + unsigned int sse_tot_simd = 0; + int sum_tot_c = 0; + int sum_tot_simd = 0; + const int stride = width(); + int k = 0; + + for (int row = 0; row < height(); row += 16) { + for (int col = 0; col < width(); col += 32) { + API_REGISTER_STATE_CHECK(params_.func( + src_ + stride * row + col, stride, ref_ + stride * row + col, + stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); + aom_get_var_sse_sum_16x16_dual_c( + src_ + stride * row + col, stride, ref_ + stride * row + col, + stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]); + k += 2; + } + } + EXPECT_EQ(sse_tot_c, sse_tot_simd); + EXPECT_EQ(sum_tot_c, sum_tot_simd); + for (int p = 0; p < 64; p++) { + EXPECT_EQ(sse1[p], sse2[p]); + EXPECT_EQ(sse_tot_simd, sse_tot_c); + EXPECT_EQ(sum_tot_simd, sum_tot_c); + EXPECT_EQ(var1[p], var2[p]); + } + } +} + +template <typename GetSseSum16x16DualFuncType> +void MainTestClass<GetSseSum16x16DualFuncType>::MinTestSseSumDual() { + memset(src_, 0, block_size()); + memset(ref_, 255, block_size()); + unsigned int sse1[64] = { 0 }; + unsigned int sse2[64] = { 0 }; + unsigned int var1[64] = { 0 }; + unsigned int var2[64] = { 0 }; + unsigned int sse_tot_c = 0; + unsigned int sse_tot_simd = 0; + int sum_tot_c = 0; + int sum_tot_simd = 0; + const int stride = width(); + int k = 0; + + for (int row = 0; row < height(); row += 16) { + for (int col = 0; col < width(); col += 32) { + API_REGISTER_STATE_CHECK(params_.func( + src_ + stride * row + col, stride, ref_ + stride * row + col, stride, + &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); + aom_get_var_sse_sum_16x16_dual_c( + src_ + stride * row + col, stride, ref_ + stride * row + col, stride, + &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]); + k += 2; + } + } + EXPECT_EQ(sse_tot_simd, sse_tot_c); + EXPECT_EQ(sum_tot_simd, sum_tot_c); + for (int p = 0; p < 64; p++) { + EXPECT_EQ(sse1[p], sse2[p]); + EXPECT_EQ(var1[p], var2[p]); + } +} + +template <typename GetSseSum16x16DualFuncType> +void MainTestClass<GetSseSum16x16DualFuncType>::MaxTestSseSumDual() { + memset(src_, 255, block_size()); + memset(ref_, 0, block_size()); + unsigned int sse1[64] = { 0 }; + unsigned int sse2[64] = { 0 }; + unsigned int var1[64] = { 0 }; + unsigned int var2[64] = { 0 }; + unsigned int sse_tot_c = 0; + unsigned int sse_tot_simd = 0; + int sum_tot_c = 0; + int sum_tot_simd = 0; + const int stride = width(); + int k = 0; + + for (int row = 0; row < height(); row += 16) { + for (int col = 0; col < width(); col += 32) { + API_REGISTER_STATE_CHECK(params_.func( + src_ + stride * row + col, stride, ref_ + stride * row + col, stride, + &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); + aom_get_var_sse_sum_16x16_dual_c( + src_ + stride * row + col, stride, ref_ + stride * row + col, stride, + &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]); + k += 2; + } + } + EXPECT_EQ(sse_tot_c, sse_tot_simd); + EXPECT_EQ(sum_tot_c, sum_tot_simd); + + for (int p = 0; p < 64; p++) { + EXPECT_EQ(sse1[p], sse2[p]); + EXPECT_EQ(var1[p], var2[p]); + } +} + +template <typename GetSseSum16x16DualFuncType> +void MainTestClass<GetSseSum16x16DualFuncType>::SseSum_SpeedTestDual() { + const int loop_count = 1000000000 / block_size(); + for (int idx = 0; idx < block_size(); ++idx) { + src_[idx] = rnd_.Rand8(); + ref_[idx] = rnd_.Rand8(); + } + + unsigned int sse1[2] = { 0 }; + unsigned int sse2[2] = { 0 }; + unsigned int var1[2] = { 0 }; + unsigned int var2[2] = { 0 }; + unsigned int sse_tot_c = 0; + unsigned int sse_tot_simd = 0; + int sum_tot_c = 0; + int sum_tot_simd = 0; + const int stride = width(); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int r = 0; r < loop_count; ++r) { + for (int row = 0; row < height(); row += 16) { + for (int col = 0; col < width(); col += 32) { + aom_get_var_sse_sum_16x16_dual_c(src_ + stride * row + col, stride, + ref_ + stride * row + col, stride, + sse2, &sse_tot_c, &sum_tot_c, var2); + } + } + } + aom_usec_timer_mark(&timer); + const double elapsed_time_ref = + static_cast<double>(aom_usec_timer_elapsed(&timer)); + + aom_usec_timer_start(&timer); + for (int r = 0; r < loop_count; ++r) { + for (int row = 0; row < height(); row += 16) { + for (int col = 0; col < width(); col += 32) { + params_.func(src_ + stride * row + col, stride, + ref_ + stride * row + col, stride, sse1, &sse_tot_simd, + &sum_tot_simd, var1); + } + } + } + aom_usec_timer_mark(&timer); + const double elapsed_time_simd = + static_cast<double>(aom_usec_timer_elapsed(&timer)); + + printf( + "aom_getvar_16x16_dual for block=%dx%d : ref_time=%lf \t simd_time=%lf " + "\t " + "gain=%lf \n", + width(), height(), elapsed_time_ref, elapsed_time_simd, + elapsed_time_ref / elapsed_time_simd); +} + +//////////////////////////////////////////////////////////////////////////////// +// Tests related to MSE / SSE. + +template <typename FunctionType> +void MainTestClass<FunctionType>::RefTestMse() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); ++j) { + if (!use_high_bit_depth()) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); +#if CONFIG_AV1_HIGHBITDEPTH + } else { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); +#endif // CONFIG_AV1_HIGHBITDEPTH + } + } + unsigned int sse1, sse2; + const int stride = width(); + API_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1)); + variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, + stride, &sse2, use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2); + } +} + +template <typename FunctionType> +void MainTestClass<FunctionType>::RefTestSse() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); ++j) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); + } + unsigned int sse2; + unsigned int var1; + const int stride = width(); + API_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride)); + variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, + stride, &sse2, false, AOM_BITS_8); + EXPECT_EQ(var1, sse2); + } +} + +template <typename FunctionType> +void MainTestClass<FunctionType>::MaxTestMse() { + int max_value = (1 << params_.bit_depth) - 1; + if (!use_high_bit_depth()) { + memset(src_, max_value, block_size()); + memset(ref_, 0, block_size()); +#if CONFIG_AV1_HIGHBITDEPTH + } else { + aom_memset16(CONVERT_TO_SHORTPTR(src_), max_value, block_size()); + aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, block_size()); +#endif // CONFIG_AV1_HIGHBITDEPTH + } + unsigned int sse; + API_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse)); + unsigned int expected = (unsigned int)block_size() * max_value * max_value; + switch (params_.bit_depth) { + case AOM_BITS_12: expected = ROUND_POWER_OF_TWO(expected, 8); break; + case AOM_BITS_10: expected = ROUND_POWER_OF_TWO(expected, 4); break; + case AOM_BITS_8: + default: break; + } + EXPECT_EQ(expected, sse); +} + +template <typename FunctionType> +void MainTestClass<FunctionType>::MaxTestSse() { + memset(src_, 255, block_size()); + memset(ref_, 0, block_size()); + unsigned int var; + API_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width())); + const unsigned int expected = block_size() * 255 * 255; + EXPECT_EQ(expected, var); +} + +//////////////////////////////////////////////////////////////////////////////// + +using std::get; +using std::make_tuple; +using std::tuple; + +template <typename FunctionType> +class SubpelVarianceTest + : public ::testing::TestWithParam<TestParams<FunctionType> > { + public: + void SetUp() override { + params_ = this->GetParam(); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + if (!use_high_bit_depth()) { + src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size())); + sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size())); + ref_ = reinterpret_cast<uint8_t *>( + aom_memalign(32, block_size() + width() + height() + 1)); + } else { + src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + aom_memalign(32, block_size() * sizeof(uint16_t)))); + sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( + aom_memalign(32, block_size() * sizeof(uint16_t)))); + ref_ = CONVERT_TO_BYTEPTR(aom_memalign( + 32, (block_size() + width() + height() + 1) * sizeof(uint16_t))); + } + ASSERT_NE(src_, nullptr); + ASSERT_NE(sec_, nullptr); + ASSERT_NE(ref_, nullptr); + } + + void TearDown() override { + if (!use_high_bit_depth()) { + aom_free(src_); + aom_free(ref_); + aom_free(sec_); + } else { + aom_free(CONVERT_TO_SHORTPTR(src_)); + aom_free(CONVERT_TO_SHORTPTR(ref_)); + aom_free(CONVERT_TO_SHORTPTR(sec_)); + } + } + + protected: + void RefTest(); + void ExtremeRefTest(); + void SpeedTest(); + + ACMRandom rnd_; + uint8_t *src_; + uint8_t *ref_; + uint8_t *sec_; + TestParams<FunctionType> params_; + DIST_WTD_COMP_PARAMS jcp_param_; + + // some relay helpers + bool use_high_bit_depth() const { return params_.use_high_bit_depth; } + int byte_shift() const { return params_.bit_depth - 8; } + int block_size() const { return params_.block_size; } + int width() const { return params_.width; } + int height() const { return params_.height; } + uint32_t mask() const { return params_.mask; } +}; + +template <typename SubpelVarianceFunctionType> +void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() { + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + if (!use_high_bit_depth()) { + for (int j = 0; j < block_size(); j++) { + src_[j] = rnd_.Rand8(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + ref_[j] = rnd_.Rand8(); + } + } else { + for (int j = 0; j < block_size(); j++) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); + } + } + unsigned int sse1, sse2; + unsigned int var1; + API_REGISTER_STATE_CHECK( + var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); + const unsigned int var2 = subpel_variance_ref( + ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, + use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; + EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; + } + } +} + +template <typename SubpelVarianceFunctionType> +void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() { + // Compare against reference. + // Src: Set the first half of values to 0, the second half to the maximum. + // Ref: Set the first half of values to the maximum, the second half to 0. + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + const int half = block_size() / 2; + if (!use_high_bit_depth()) { + memset(src_, 0, half); + memset(src_ + half, 255, half); + memset(ref_, 255, half); + memset(ref_ + half, 0, half + width() + height() + 1); + } else { + aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half); + aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half); + aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half); + aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(), + half + width() + height() + 1); + } + unsigned int sse1, sse2; + unsigned int var1; + API_REGISTER_STATE_CHECK( + var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); + const unsigned int var2 = subpel_variance_ref( + ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, + use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; + EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; + } + } +} + +template <typename SubpelVarianceFunctionType> +void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() { + if (!use_high_bit_depth()) { + for (int j = 0; j < block_size(); j++) { + src_[j] = rnd_.Rand8(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + ref_[j] = rnd_.Rand8(); + } + } else { + for (int j = 0; j < block_size(); j++) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); + } + } + + unsigned int sse1, sse2; + int run_time = 1000000000 / block_size(); + aom_usec_timer timer; + + aom_usec_timer_start(&timer); + for (int i = 0; i < run_time; ++i) { + int x = rnd_(8); + int y = rnd_(8); + params_.func(ref_, width() + 1, x, y, src_, width(), &sse1); + } + aom_usec_timer_mark(&timer); + + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + + aom_usec_timer timer_c; + + aom_usec_timer_start(&timer_c); + for (int i = 0; i < run_time; ++i) { + int x = rnd_(8); + int y = rnd_(8); + subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y, + &sse2, use_high_bit_depth(), params_.bit_depth); + } + aom_usec_timer_mark(&timer_c); + + const int elapsed_time_c = static_cast<int>(aom_usec_timer_elapsed(&timer_c)); + + printf( + "sub_pixel_variance_%dx%d_%d: ref_time=%d us opt_time=%d us gain=%d \n", + width(), height(), params_.bit_depth, elapsed_time_c, elapsed_time, + elapsed_time_c / elapsed_time); +} + +template <> +void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() { + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + if (!use_high_bit_depth()) { + for (int j = 0; j < block_size(); j++) { + src_[j] = rnd_.Rand8(); + sec_[j] = rnd_.Rand8(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + ref_[j] = rnd_.Rand8(); + } + } else { + for (int j = 0; j < block_size(); j++) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); + CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); + } + } + uint32_t sse1, sse2; + uint32_t var1, var2; + API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y, + src_, width(), &sse1, sec_)); + var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width, + params_.log2height, x, y, &sse2, + use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; + EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; + } + } +} + +template <> +void SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>::RefTest() { + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + if (!use_high_bit_depth()) { + for (int j = 0; j < block_size(); j++) { + src_[j] = rnd_.Rand8(); + sec_[j] = rnd_.Rand8(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + ref_[j] = rnd_.Rand8(); + } + } else { + for (int j = 0; j < block_size(); j++) { + CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); + CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); + } + } + for (int x0 = 0; x0 < 2; ++x0) { + for (int y0 = 0; y0 < 4; ++y0) { + uint32_t sse1, sse2; + uint32_t var1, var2; + jcp_param_.fwd_offset = quant_dist_lookup_table[y0][x0]; + jcp_param_.bck_offset = quant_dist_lookup_table[y0][1 - x0]; + API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 0, x, y, + src_, width(), &sse1, + sec_, &jcp_param_)); + var2 = dist_wtd_subpel_avg_variance_ref( + ref_, src_, sec_, params_.log2width, params_.log2height, x, y, + &sse2, use_high_bit_depth(), params_.bit_depth, &jcp_param_); + EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; + EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; + } + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// + +#if !CONFIG_REALTIME_ONLY + +static const int kMaskMax = 64; + +typedef TestParams<ObmcSubpelVarFunc> ObmcSubpelVarianceParams; + +template <typename FunctionType> +class ObmcVarianceTest + : public ::testing::TestWithParam<TestParams<FunctionType> > { + public: + void SetUp() override { + params_ = this->GetParam(); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + if (!use_high_bit_depth()) { + pre_ = reinterpret_cast<uint8_t *>( + aom_memalign(32, block_size() + width() + height() + 1)); + } else { + pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(aom_memalign( + 32, (block_size() + width() + height() + 1) * sizeof(uint16_t)))); + } + wsrc_ = reinterpret_cast<int32_t *>( + aom_memalign(32, block_size() * sizeof(uint32_t))); + mask_ = reinterpret_cast<int32_t *>( + aom_memalign(32, block_size() * sizeof(uint32_t))); + ASSERT_NE(pre_, nullptr); + ASSERT_NE(wsrc_, nullptr); + ASSERT_NE(mask_, nullptr); + } + + void TearDown() override { + if (!use_high_bit_depth()) { + aom_free(pre_); + } else { + aom_free(CONVERT_TO_SHORTPTR(pre_)); + } + aom_free(wsrc_); + aom_free(mask_); + } + + protected: + void RefTest(); + void ExtremeRefTest(); + void SpeedTest(); + + ACMRandom rnd_; + uint8_t *pre_; + int32_t *wsrc_; + int32_t *mask_; + TestParams<FunctionType> params_; + + // some relay helpers + bool use_high_bit_depth() const { return params_.use_high_bit_depth; } + int byte_shift() const { return params_.bit_depth - 8; } + int block_size() const { return params_.block_size; } + int width() const { return params_.width; } + int height() const { return params_.height; } + uint32_t bd_mask() const { return params_.mask; } +}; + +template <> +void ObmcVarianceTest<ObmcSubpelVarFunc>::RefTest() { + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + if (!use_high_bit_depth()) + for (int j = 0; j < block_size() + width() + height() + 1; j++) + pre_[j] = rnd_.Rand8(); + else + for (int j = 0; j < block_size() + width() + height() + 1; j++) + CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask(); + for (int j = 0; j < block_size(); j++) { + wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1); + mask_[j] = rnd_(kMaskMax * kMaskMax + 1); + } + + uint32_t sse1, sse2; + uint32_t var1, var2; + API_REGISTER_STATE_CHECK( + var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1)); + var2 = obmc_subpel_variance_ref( + pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_, + &sse2, use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; + EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; + } + } +} + +template <> +void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() { + // Pre: Set the first half of values to the maximum, the second half to 0. + // Mask: same as above + // WSrc: Set the first half of values to 0, the second half to the maximum. + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + const int half = block_size() / 2; + if (!use_high_bit_depth()) { + memset(pre_, 255, half); + memset(pre_ + half, 0, half + width() + height() + 1); + } else { + aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half); + aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0, + half + width() + height() + 1); + } + for (int j = 0; j < half; j++) { + wsrc_[j] = bd_mask() * kMaskMax * kMaskMax; + mask_[j] = 0; + } + for (int j = half; j < block_size(); j++) { + wsrc_[j] = 0; + mask_[j] = kMaskMax * kMaskMax; + } + + uint32_t sse1, sse2; + uint32_t var1, var2; + API_REGISTER_STATE_CHECK( + var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1)); + var2 = obmc_subpel_variance_ref( + pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_, + &sse2, use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; + EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; + } + } +} + +template <> +void ObmcVarianceTest<ObmcSubpelVarFunc>::SpeedTest() { + if (!use_high_bit_depth()) + for (int j = 0; j < block_size() + width() + height() + 1; j++) + pre_[j] = rnd_.Rand8(); + else + for (int j = 0; j < block_size() + width() + height() + 1; j++) + CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask(); + for (int j = 0; j < block_size(); j++) { + wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1); + mask_[j] = rnd_(kMaskMax * kMaskMax + 1); + } + unsigned int sse1; + const int stride = width() + 1; + int run_time = 1000000000 / block_size(); + aom_usec_timer timer; + + aom_usec_timer_start(&timer); + for (int i = 0; i < run_time; ++i) { + int x = rnd_(8); + int y = rnd_(8); + API_REGISTER_STATE_CHECK( + params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1)); + } + aom_usec_timer_mark(&timer); + + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(), + params_.bit_depth, elapsed_time); +} + +#endif // !CONFIG_REALTIME_ONLY + +typedef MseWxHTestClass<MseWxH16bitFunc> MseWxHTest; +typedef Mse16xHTestClass<Mse16xH16bitFunc> Mse16xHTest; +typedef MainTestClass<VarianceMxNFunc> AvxMseTest; +typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest; +typedef MainTestClass<GetSseSum8x8QuadFunc> GetSseSum8x8QuadTest; +typedef MainTestClass<GetSseSum16x16DualFunc> GetSseSum16x16DualTest; +typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest; +typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest; +typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc> + AvxDistWtdSubpelAvgVarianceTest; +#if !CONFIG_REALTIME_ONLY +typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxObmcSubpelVarianceTest; +#endif +typedef TestParams<MseWxH16bitFunc> MseWxHParams; +typedef TestParams<Mse16xH16bitFunc> Mse16xHParams; + +TEST_P(MseWxHTest, RefMse) { RefMatchTestMse(); } +TEST_P(MseWxHTest, DISABLED_SpeedMse) { SpeedTest(); } +TEST_P(Mse16xHTest, RefMse) { RefMatchTestMse(); } +TEST_P(Mse16xHTest, RefMseExtreme) { RefMatchExtremeTestMse(); } +TEST_P(Mse16xHTest, DISABLED_SpeedMse) { SpeedTest(); } +TEST_P(AvxMseTest, RefMse) { RefTestMse(); } +TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); } +TEST_P(AvxVarianceTest, Zero) { ZeroTest(); } +TEST_P(AvxVarianceTest, Ref) { RefTest(); } +TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); } +TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); } +TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); } +TEST_P(GetSseSum8x8QuadTest, RefMseSum) { RefTestSseSum(); } +TEST_P(GetSseSum8x8QuadTest, MinSseSum) { MinTestSseSum(); } +TEST_P(GetSseSum8x8QuadTest, MaxMseSum) { MaxTestSseSum(); } +TEST_P(GetSseSum8x8QuadTest, DISABLED_Speed) { SseSum_SpeedTest(); } +TEST_P(GetSseSum16x16DualTest, RefMseSum) { RefTestSseSumDual(); } +TEST_P(GetSseSum16x16DualTest, MinSseSum) { MinTestSseSumDual(); } +TEST_P(GetSseSum16x16DualTest, MaxMseSum) { MaxTestSseSumDual(); } +TEST_P(GetSseSum16x16DualTest, DISABLED_Speed) { SseSum_SpeedTestDual(); } +TEST_P(SumOfSquaresTest, Const) { ConstTest(); } +TEST_P(SumOfSquaresTest, Ref) { RefTest(); } +TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); } +TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } +TEST_P(AvxSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } +TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); } +TEST_P(AvxDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); } +#if !CONFIG_REALTIME_ONLY +TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); } +TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } +TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } +#endif + +INSTANTIATE_TEST_SUITE_P( + C, MseWxHTest, + ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_c, 8), + MseWxHParams(3, 2, &aom_mse_wxh_16bit_c, 8), + MseWxHParams(2, 3, &aom_mse_wxh_16bit_c, 8), + MseWxHParams(2, 2, &aom_mse_wxh_16bit_c, 8))); + +INSTANTIATE_TEST_SUITE_P( + C, Mse16xHTest, + ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_c, 8), + Mse16xHParams(3, 2, &aom_mse_16xh_16bit_c, 8), + Mse16xHParams(2, 3, &aom_mse_16xh_16bit_c, 8), + Mse16xHParams(2, 2, &aom_mse_16xh_16bit_c, 8))); + +INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest, + ::testing::Values(aom_get_mb_ss_c)); + +typedef TestParams<VarianceMxNFunc> MseParams; +INSTANTIATE_TEST_SUITE_P(C, AvxMseTest, + ::testing::Values(MseParams(4, 4, &aom_mse16x16_c), + MseParams(4, 3, &aom_mse16x8_c), + MseParams(3, 4, &aom_mse8x16_c), + MseParams(3, 3, &aom_mse8x8_c))); + +typedef TestParams<VarianceMxNFunc> VarianceParams; +const VarianceParams kArrayVariance_c[] = { + VarianceParams(7, 7, &aom_variance128x128_c), + VarianceParams(7, 6, &aom_variance128x64_c), + VarianceParams(6, 7, &aom_variance64x128_c), + VarianceParams(6, 6, &aom_variance64x64_c), + VarianceParams(6, 5, &aom_variance64x32_c), + VarianceParams(5, 6, &aom_variance32x64_c), + VarianceParams(5, 5, &aom_variance32x32_c), + VarianceParams(5, 4, &aom_variance32x16_c), + VarianceParams(4, 5, &aom_variance16x32_c), + VarianceParams(4, 4, &aom_variance16x16_c), + VarianceParams(4, 3, &aom_variance16x8_c), + VarianceParams(3, 4, &aom_variance8x16_c), + VarianceParams(3, 3, &aom_variance8x8_c), + VarianceParams(3, 2, &aom_variance8x4_c), + VarianceParams(2, 3, &aom_variance4x8_c), + VarianceParams(2, 2, &aom_variance4x4_c), +#if !CONFIG_REALTIME_ONLY + VarianceParams(6, 4, &aom_variance64x16_c), + VarianceParams(4, 6, &aom_variance16x64_c), + VarianceParams(5, 3, &aom_variance32x8_c), + VarianceParams(3, 5, &aom_variance8x32_c), + VarianceParams(4, 2, &aom_variance16x4_c), + VarianceParams(2, 4, &aom_variance4x16_c), +#endif +}; +INSTANTIATE_TEST_SUITE_P(C, AvxVarianceTest, + ::testing::ValuesIn(kArrayVariance_c)); + +typedef TestParams<GetSseSum8x8QuadFunc> GetSseSumParams; +const GetSseSumParams kArrayGetSseSum8x8Quad_c[] = { + GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_c, 0), + GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_c, 0), + GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_c, 0), + GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_c, 0) +}; +INSTANTIATE_TEST_SUITE_P(C, GetSseSum8x8QuadTest, + ::testing::ValuesIn(kArrayGetSseSum8x8Quad_c)); + +typedef TestParams<GetSseSum16x16DualFunc> GetSseSumParamsDual; +const GetSseSumParamsDual kArrayGetSseSum16x16Dual_c[] = { + GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_c, 0), + GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_c, 0), + GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_c, 0), + GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_c, 0) +}; + +INSTANTIATE_TEST_SUITE_P(C, GetSseSum16x16DualTest, + ::testing::ValuesIn(kArrayGetSseSum16x16Dual_c)); + +typedef TestParams<SubpixVarMxNFunc> SubpelVarianceParams; +const SubpelVarianceParams kArraySubpelVariance_c[] = { + SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0), + SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0), + SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0), + SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0), + SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0), + SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0), + SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0), + SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0), + SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0), + SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0), + SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0), + SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0), + SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0), + SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0), + SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0), + SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0), +#if !CONFIG_REALTIME_ONLY + SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0), + SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0), + SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0), + SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0), + SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0), + SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(C, AvxSubpelVarianceTest, + ::testing::ValuesIn(kArraySubpelVariance_c)); + +typedef TestParams<SubpixAvgVarMxNFunc> SubpelAvgVarianceParams; +const SubpelAvgVarianceParams kArraySubpelAvgVariance_c[] = { + SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0), + SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0), + SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0), + SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0), + SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0), + SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0), + SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0), + SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0), + SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0), + SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0), + SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0), + SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0), + SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0), + SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0), + SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0), + SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0), +#if !CONFIG_REALTIME_ONLY + SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0), + SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0), + SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0), + SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0), + SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0), + SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(C, AvxSubpelAvgVarianceTest, + ::testing::ValuesIn(kArraySubpelAvgVariance_c)); + +typedef TestParams<DistWtdSubpixAvgVarMxNFunc> DistWtdSubpelAvgVarianceParams; +const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_c[] = { + DistWtdSubpelAvgVarianceParams( + 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_c, 0), + DistWtdSubpelAvgVarianceParams( + 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_c, 0), + DistWtdSubpelAvgVarianceParams( + 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_c, 0), + DistWtdSubpelAvgVarianceParams( + 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_c, 0), + DistWtdSubpelAvgVarianceParams( + 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_c, 0), + DistWtdSubpelAvgVarianceParams( + 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_c, 0), + DistWtdSubpelAvgVarianceParams( + 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_c, 0), + DistWtdSubpelAvgVarianceParams(4, 3, + &aom_dist_wtd_sub_pixel_avg_variance16x8_c, 0), + DistWtdSubpelAvgVarianceParams(3, 4, + &aom_dist_wtd_sub_pixel_avg_variance8x16_c, 0), + DistWtdSubpelAvgVarianceParams(3, 3, + &aom_dist_wtd_sub_pixel_avg_variance8x8_c, 0), + DistWtdSubpelAvgVarianceParams(3, 2, + &aom_dist_wtd_sub_pixel_avg_variance8x4_c, 0), + DistWtdSubpelAvgVarianceParams(2, 3, + &aom_dist_wtd_sub_pixel_avg_variance4x8_c, 0), + DistWtdSubpelAvgVarianceParams(2, 2, + &aom_dist_wtd_sub_pixel_avg_variance4x4_c, 0), +#if !CONFIG_REALTIME_ONLY + + DistWtdSubpelAvgVarianceParams( + 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_c, 0), + DistWtdSubpelAvgVarianceParams( + 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_c, 0), + DistWtdSubpelAvgVarianceParams(5, 3, + &aom_dist_wtd_sub_pixel_avg_variance32x8_c, 0), + DistWtdSubpelAvgVarianceParams(3, 5, + &aom_dist_wtd_sub_pixel_avg_variance8x32_c, 0), + DistWtdSubpelAvgVarianceParams(4, 2, + &aom_dist_wtd_sub_pixel_avg_variance16x4_c, 0), + DistWtdSubpelAvgVarianceParams(2, 4, + &aom_dist_wtd_sub_pixel_avg_variance4x16_c, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(C, AvxDistWtdSubpelAvgVarianceTest, + ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_c)); + +#if !CONFIG_REALTIME_ONLY +INSTANTIATE_TEST_SUITE_P( + C, AvxObmcSubpelVarianceTest, + ::testing::Values( + ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c, + 0), + ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0), + ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0), + ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0), + ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0), + ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0), + ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0), + ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0), + ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0), + ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0), + ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0), + ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0), + ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0), + ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0), + ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0), + ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0), + + ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0), + ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0), + ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0), + ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0), + ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0), + ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0))); +#endif + +#if CONFIG_AV1_HIGHBITDEPTH +typedef uint64_t (*MseHBDWxH16bitFunc)(uint16_t *dst, int dstride, + uint16_t *src, int sstride, int w, + int h); + +template <typename FunctionType> +class MseHBDWxHTestClass + : public ::testing::TestWithParam<TestParams<FunctionType> > { + public: + void SetUp() override { + params_ = this->GetParam(); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + src_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, block_size() * sizeof(src_))); + dst_ = reinterpret_cast<uint16_t *>( + aom_memalign(16, block_size() * sizeof(dst_))); + ASSERT_NE(src_, nullptr); + ASSERT_NE(dst_, nullptr); + } + + void TearDown() override { + aom_free(src_); + aom_free(dst_); + src_ = nullptr; + dst_ = nullptr; + } + + protected: + void RefMatchTestMse(); + void SpeedTest(); + + protected: + ACMRandom rnd_; + uint16_t *dst_; + uint16_t *src_; + TestParams<FunctionType> params_; + + // some relay helpers + int block_size() const { return params_.block_size; } + int width() const { return params_.width; } + int d_stride() const { return params_.width; } // stride is same as width + int s_stride() const { return params_.width; } // stride is same as width + int height() const { return params_.height; } + int mask() const { return params_.mask; } +}; + +template <typename MseHBDWxHFunctionType> +void MseHBDWxHTestClass<MseHBDWxHFunctionType>::SpeedTest() { + aom_usec_timer ref_timer, test_timer; + double elapsed_time_c = 0; + double elapsed_time_simd = 0; + int run_time = 10000000; + int w = width(); + int h = height(); + int dstride = d_stride(); + int sstride = s_stride(); + for (int k = 0; k < block_size(); ++k) { + dst_[k] = rnd_.Rand16() & mask(); + src_[k] = rnd_.Rand16() & mask(); + } + aom_usec_timer_start(&ref_timer); + for (int i = 0; i < run_time; i++) { + aom_mse_wxh_16bit_highbd_c(dst_, dstride, src_, sstride, w, h); + } + aom_usec_timer_mark(&ref_timer); + elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer)); + + aom_usec_timer_start(&test_timer); + for (int i = 0; i < run_time; i++) { + params_.func(dst_, dstride, src_, sstride, w, h); + } + aom_usec_timer_mark(&test_timer); + elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer)); + + printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(), + elapsed_time_c, elapsed_time_simd, + (elapsed_time_c / elapsed_time_simd)); +} + +template <typename MseHBDWxHFunctionType> +void MseHBDWxHTestClass<MseHBDWxHFunctionType>::RefMatchTestMse() { + uint64_t mse_ref = 0; + uint64_t mse_mod = 0; + int w = width(); + int h = height(); + int dstride = d_stride(); + int sstride = s_stride(); + for (int i = 0; i < 10; i++) { + for (int k = 0; k < block_size(); ++k) { + dst_[k] = rnd_.Rand16() & mask(); + src_[k] = rnd_.Rand16() & mask(); + } + API_REGISTER_STATE_CHECK(mse_ref = aom_mse_wxh_16bit_highbd_c( + dst_, dstride, src_, sstride, w, h)); + API_REGISTER_STATE_CHECK( + mse_mod = params_.func(dst_, dstride, src_, sstride, w, h)); + EXPECT_EQ(mse_ref, mse_mod) + << "ref mse: " << mse_ref << " mod mse: " << mse_mod; + } +} + +typedef TestParams<MseHBDWxH16bitFunc> MseHBDWxHParams; +typedef MseHBDWxHTestClass<MseHBDWxH16bitFunc> MseHBDWxHTest; +typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest; +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDMseTest); +typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest; +typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest; +typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxHBDSubpelAvgVarianceTest; +typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc> + AvxHBDDistWtdSubpelAvgVarianceTest; +#if !CONFIG_REALTIME_ONLY +typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxHBDObmcSubpelVarianceTest; +#endif +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDObmcSubpelVarianceTest); + +TEST_P(MseHBDWxHTest, RefMse) { RefMatchTestMse(); } +TEST_P(MseHBDWxHTest, DISABLED_SpeedMse) { SpeedTest(); } +TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); } +TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); } +TEST_P(AvxHBDMseTest, DISABLED_SpeedMse) { SpeedTest(); } +TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); } +TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); } +TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); } +TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); } +TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); } +TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); } +TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } +TEST_P(AvxHBDSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } +TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); } +TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); } +#if !CONFIG_REALTIME_ONLY +TEST_P(AvxHBDObmcSubpelVarianceTest, Ref) { RefTest(); } +TEST_P(AvxHBDObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } +TEST_P(AvxHBDObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } +#endif + +INSTANTIATE_TEST_SUITE_P( + C, MseHBDWxHTest, + ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_c, 10), + MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_c, 10), + MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_c, 10), + MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_c, 10))); + +INSTANTIATE_TEST_SUITE_P( + C, AvxHBDMseTest, + ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_c, 12), + MseParams(4, 3, &aom_highbd_12_mse16x8_c, 12), + MseParams(3, 4, &aom_highbd_12_mse8x16_c, 12), + MseParams(3, 3, &aom_highbd_12_mse8x8_c, 12), + MseParams(4, 4, &aom_highbd_10_mse16x16_c, 10), + MseParams(4, 3, &aom_highbd_10_mse16x8_c, 10), + MseParams(3, 4, &aom_highbd_10_mse8x16_c, 10), + MseParams(3, 3, &aom_highbd_10_mse8x8_c, 10), + MseParams(4, 4, &aom_highbd_8_mse16x16_c, 8), + MseParams(4, 3, &aom_highbd_8_mse16x8_c, 8), + MseParams(3, 4, &aom_highbd_8_mse8x16_c, 8), + MseParams(3, 3, &aom_highbd_8_mse8x8_c, 8))); + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, MseHBDWxHTest, + ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_neon, 10), + MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_neon, 10), + MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_neon, 10), + MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_neon, + 10))); + +INSTANTIATE_TEST_SUITE_P( + NEON, AvxHBDMseTest, + ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_neon, 12), + MseParams(4, 3, &aom_highbd_12_mse16x8_neon, 12), + MseParams(3, 4, &aom_highbd_12_mse8x16_neon, 12), + MseParams(3, 3, &aom_highbd_12_mse8x8_neon, 12), + MseParams(4, 4, &aom_highbd_10_mse16x16_neon, 10), + MseParams(4, 3, &aom_highbd_10_mse16x8_neon, 10), + MseParams(3, 4, &aom_highbd_10_mse8x16_neon, 10), + MseParams(3, 3, &aom_highbd_10_mse8x8_neon, 10), + MseParams(4, 4, &aom_highbd_8_mse16x16_neon, 8), + MseParams(4, 3, &aom_highbd_8_mse16x8_neon, 8), + MseParams(3, 4, &aom_highbd_8_mse8x16_neon, 8), + MseParams(3, 3, &aom_highbd_8_mse8x8_neon, 8))); +#endif // HAVE_NEON + +#if HAVE_NEON_DOTPROD +INSTANTIATE_TEST_SUITE_P( + NEON_DOTPROD, AvxHBDMseTest, + ::testing::Values(MseParams(4, 4, &aom_highbd_8_mse16x16_neon_dotprod, 8), + MseParams(4, 3, &aom_highbd_8_mse16x8_neon_dotprod, 8), + MseParams(3, 4, &aom_highbd_8_mse8x16_neon_dotprod, 8), + MseParams(3, 3, &aom_highbd_8_mse8x8_neon_dotprod, 8))); +#endif // HAVE_NEON_DOTPROD + +#if HAVE_SVE +INSTANTIATE_TEST_SUITE_P( + SVE, MseHBDWxHTest, + ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sve, 10), + MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sve, 10), + MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sve, 10), + MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sve, + 10))); + +INSTANTIATE_TEST_SUITE_P( + SVE, AvxHBDMseTest, + ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sve, 12), + MseParams(4, 3, &aom_highbd_12_mse16x8_sve, 12), + MseParams(3, 4, &aom_highbd_12_mse8x16_sve, 12), + MseParams(3, 3, &aom_highbd_12_mse8x8_sve, 12), + MseParams(4, 4, &aom_highbd_10_mse16x16_sve, 10), + MseParams(4, 3, &aom_highbd_10_mse16x8_sve, 10), + MseParams(3, 4, &aom_highbd_10_mse8x16_sve, 10), + MseParams(3, 3, &aom_highbd_10_mse8x8_sve, 10), + MseParams(4, 4, &aom_highbd_8_mse16x16_sve, 8), + MseParams(4, 3, &aom_highbd_8_mse16x8_sve, 8), + MseParams(3, 4, &aom_highbd_8_mse8x16_sve, 8), + MseParams(3, 3, &aom_highbd_8_mse8x8_sve, 8))); +#endif // HAVE_SVE + +const VarianceParams kArrayHBDVariance_c[] = { + VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12), + VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12), + VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12), + VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12), + VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12), + VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12), + VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12), + VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12), + VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12), + VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12), + VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12), + VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12), + VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12), + VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12), + VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12), + VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12), + VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10), + VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10), + VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10), + VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10), + VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10), + VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10), + VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10), + VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10), + VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10), + VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10), + VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10), + VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10), + VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10), + VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10), + VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10), + VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10), + VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8), + VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8), + VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8), + VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8), + VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8), + VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8), + VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8), + VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8), + VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8), + VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8), + VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8), + VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8), + VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8), + VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8), + VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8), + VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8), +#if !CONFIG_REALTIME_ONLY + VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12), + VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12), + VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12), + VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12), + VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12), + VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12), + VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10), + VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10), + VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10), + VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10), + VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10), + VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10), + VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8), + VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8), + VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8), + VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8), + VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8), + VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8), +#endif +}; +INSTANTIATE_TEST_SUITE_P(C, AvxHBDVarianceTest, + ::testing::ValuesIn(kArrayHBDVariance_c)); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AvxHBDVarianceTest, + ::testing::Values( + VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8), + VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10), + VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12))); +#endif // HAVE_SSE4_1 + +const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = { + SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8), + SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8), + SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8), + SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8), + SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8), + SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8), + SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8), + SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8), + SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8), + SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8), + SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8), + SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8), + SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8), + SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8), + SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8), + SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8), + SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10), + SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10), + SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10), + SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10), + SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10), + SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10), + SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10), + SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10), + SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10), + SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10), + SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10), + SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10), + SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10), + SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10), + SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10), + SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10), + SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12), + SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12), + SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12), + SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12), + SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12), + SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12), + SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12), + SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12), + SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12), + SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12), + SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12), + SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12), + SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12), + SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12), + SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12), + SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12), +#if !CONFIG_REALTIME_ONLY + SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8), + SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8), + SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8), + SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8), + SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8), + SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8), + SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10), + SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10), + SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10), + SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10), + SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10), + SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10), + SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12), + SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12), + SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12), + SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12), + SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12), + SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12), +#endif +}; +INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelVariance_c)); + +const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = { + SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c, + 8), + SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c, + 8), + SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c, + 8), + SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8), + SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8), + SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8), + SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8), + SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8), + SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8), + SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8), + SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8), + SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8), + SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8), + SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8), + SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8), + SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8), + SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c, + 10), + SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c, + 10), + SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c, + 10), + SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c, + 10), + SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c, + 10), + SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c, + 10), + SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c, + 10), + SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c, + 10), + SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c, + 10), + SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c, + 10), + SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c, + 10), + SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c, + 10), + SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10), + SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10), + SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10), + SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10), + SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c, + 12), + SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c, + 12), + SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c, + 12), + SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c, + 12), + SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c, + 12), + SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c, + 12), + SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c, + 12), + SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c, + 12), + SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c, + 12), + SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c, + 12), + SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c, + 12), + SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c, + 12), + SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12), + SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12), + SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12), + SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12), + +#if !CONFIG_REALTIME_ONLY + SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8), + SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8), + SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8), + SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8), + SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8), + SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8), + SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c, + 10), + SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c, + 10), + SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c, + 10), + SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c, + 10), + SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c, + 10), + SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c, + 10), + SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c, + 12), + SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c, + 12), + SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c, + 12), + SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c, + 12), + SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c, + 12), + SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c, + 12), +#endif +}; +INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelAvgVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c)); + +const DistWtdSubpelAvgVarianceParams kArrayHBDDistWtdSubpelAvgVariance_c[] = { + DistWtdSubpelAvgVarianceParams( + 7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_c, 8), + DistWtdSubpelAvgVarianceParams( + 7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_c, 8), + DistWtdSubpelAvgVarianceParams( + 6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_c, 8), + DistWtdSubpelAvgVarianceParams( + 6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_c, 8), + DistWtdSubpelAvgVarianceParams( + 6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_c, 8), + DistWtdSubpelAvgVarianceParams( + 5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_c, 8), + DistWtdSubpelAvgVarianceParams( + 5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_c, 8), + DistWtdSubpelAvgVarianceParams( + 5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_c, 8), + DistWtdSubpelAvgVarianceParams( + 4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_c, 8), + DistWtdSubpelAvgVarianceParams( + 4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_c, 8), + DistWtdSubpelAvgVarianceParams( + 4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_c, 8), + DistWtdSubpelAvgVarianceParams( + 3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_c, 8), + DistWtdSubpelAvgVarianceParams( + 3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_c, 8), + DistWtdSubpelAvgVarianceParams( + 3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_c, 8), + DistWtdSubpelAvgVarianceParams( + 2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_c, 8), + DistWtdSubpelAvgVarianceParams( + 2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_c, 8), + DistWtdSubpelAvgVarianceParams( + 7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_c, 10), + DistWtdSubpelAvgVarianceParams( + 7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_c, 10), + DistWtdSubpelAvgVarianceParams( + 6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_c, 10), + DistWtdSubpelAvgVarianceParams( + 6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_c, 10), + DistWtdSubpelAvgVarianceParams( + 6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_c, 10), + DistWtdSubpelAvgVarianceParams( + 5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_c, 10), + DistWtdSubpelAvgVarianceParams( + 5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_c, 10), + DistWtdSubpelAvgVarianceParams( + 5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_c, 10), + DistWtdSubpelAvgVarianceParams( + 4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_c, 10), + DistWtdSubpelAvgVarianceParams( + 4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_c, 10), + DistWtdSubpelAvgVarianceParams( + 4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_c, 10), + DistWtdSubpelAvgVarianceParams( + 3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_c, 10), + DistWtdSubpelAvgVarianceParams( + 3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_c, 10), + DistWtdSubpelAvgVarianceParams( + 3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_c, 10), + DistWtdSubpelAvgVarianceParams( + 2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_c, 10), + DistWtdSubpelAvgVarianceParams( + 2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_c, 10), + DistWtdSubpelAvgVarianceParams( + 7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_c, 12), + DistWtdSubpelAvgVarianceParams( + 7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_c, 12), + DistWtdSubpelAvgVarianceParams( + 6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_c, 12), + DistWtdSubpelAvgVarianceParams( + 6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_c, 12), + DistWtdSubpelAvgVarianceParams( + 6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_c, 12), + DistWtdSubpelAvgVarianceParams( + 5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_c, 12), + DistWtdSubpelAvgVarianceParams( + 5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_c, 12), + DistWtdSubpelAvgVarianceParams( + 5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_c, 12), + DistWtdSubpelAvgVarianceParams( + 4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_c, 12), + DistWtdSubpelAvgVarianceParams( + 4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_c, 12), + DistWtdSubpelAvgVarianceParams( + 4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_c, 12), + DistWtdSubpelAvgVarianceParams( + 3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_c, 12), + DistWtdSubpelAvgVarianceParams( + 3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_c, 12), + DistWtdSubpelAvgVarianceParams( + 3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_c, 12), + DistWtdSubpelAvgVarianceParams( + 2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_c, 12), + DistWtdSubpelAvgVarianceParams( + 2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_c, 12), + +#if !CONFIG_REALTIME_ONLY + DistWtdSubpelAvgVarianceParams( + 6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_c, 8), + DistWtdSubpelAvgVarianceParams( + 4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_c, 8), + DistWtdSubpelAvgVarianceParams( + 5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_c, 8), + DistWtdSubpelAvgVarianceParams( + 3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_c, 8), + DistWtdSubpelAvgVarianceParams( + 4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_c, 8), + DistWtdSubpelAvgVarianceParams( + 2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_c, 8), + DistWtdSubpelAvgVarianceParams( + 6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_c, 10), + DistWtdSubpelAvgVarianceParams( + 4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_c, 10), + DistWtdSubpelAvgVarianceParams( + 5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_c, 10), + DistWtdSubpelAvgVarianceParams( + 3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_c, 10), + DistWtdSubpelAvgVarianceParams( + 4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_c, 10), + DistWtdSubpelAvgVarianceParams( + 2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_c, 10), + DistWtdSubpelAvgVarianceParams( + 6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_c, 12), + DistWtdSubpelAvgVarianceParams( + 4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_c, 12), + DistWtdSubpelAvgVarianceParams( + 5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_c, 12), + DistWtdSubpelAvgVarianceParams( + 3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_c, 12), + DistWtdSubpelAvgVarianceParams( + 4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_c, 12), + DistWtdSubpelAvgVarianceParams( + 2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_c, 12), +#endif +}; +INSTANTIATE_TEST_SUITE_P( + C, AvxHBDDistWtdSubpelAvgVarianceTest, + ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_c)); + +#if !CONFIG_REALTIME_ONLY +const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = { + ObmcSubpelVarianceParams(7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_c, + 8), + ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_c, + 8), + ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_c, + 8), + ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_c, + 8), + ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_c, + 8), + ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_c, + 8), + ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_c, + 8), + ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_c, + 8), + ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_c, + 8), + ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_c, + 8), + ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_c, + 8), + ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_c, + 8), + ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_c, 8), + ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_c, 8), + ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_c, 8), + ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_c, 8), + ObmcSubpelVarianceParams(7, 7, + &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10), + ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c, + 10), + ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c, + 10), + ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c, + 10), + ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c, + 10), + ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c, + 10), + ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c, + 10), + ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c, + 10), + ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c, + 10), + ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c, + 10), + ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c, + 10), + ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c, + 10), + ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c, + 10), + ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c, + 10), + ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c, + 10), + ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c, + 10), + ObmcSubpelVarianceParams(7, 7, + &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12), + ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c, + 12), + ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c, + 12), + ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c, + 12), + ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c, + 12), + ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c, + 12), + ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c, + 12), + ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c, + 12), + ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c, + 12), + ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c, + 12), + ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c, + 12), + ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c, + 12), + ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c, + 12), + ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c, + 12), + ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c, + 12), + ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c, + 12), + + ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_c, + 8), + ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_c, + 8), + ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_c, + 8), + ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_c, + 8), + ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_c, + 8), + ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_c, + 8), + ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c, + 10), + ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c, + 10), + ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c, + 10), + ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c, + 10), + ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c, + 10), + ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c, + 10), + ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c, + 12), + ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c, + 12), + ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c, + 12), + ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c, + 12), + ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c, + 12), + ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c, + 12), +}; +INSTANTIATE_TEST_SUITE_P(C, AvxHBDObmcSubpelVarianceTest, + ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c)); +#endif // !CONFIG_REALTIME_ONLY +#endif // CONFIG_AV1_HIGHBITDEPTH + +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, MseWxHTest, + ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_sse2, 8), + MseWxHParams(3, 2, &aom_mse_wxh_16bit_sse2, 8), + MseWxHParams(2, 3, &aom_mse_wxh_16bit_sse2, 8), + MseWxHParams(2, 2, &aom_mse_wxh_16bit_sse2, 8))); + +INSTANTIATE_TEST_SUITE_P( + SSE2, Mse16xHTest, + ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_sse2, 8), + Mse16xHParams(3, 2, &aom_mse_16xh_16bit_sse2, 8), + Mse16xHParams(2, 3, &aom_mse_16xh_16bit_sse2, 8), + Mse16xHParams(2, 2, &aom_mse_16xh_16bit_sse2, 8))); + +INSTANTIATE_TEST_SUITE_P(SSE2, SumOfSquaresTest, + ::testing::Values(aom_get_mb_ss_sse2)); + +INSTANTIATE_TEST_SUITE_P(SSE2, AvxMseTest, + ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2), + MseParams(4, 3, &aom_mse16x8_sse2), + MseParams(3, 4, &aom_mse8x16_sse2), + MseParams(3, 3, &aom_mse8x8_sse2))); + +const VarianceParams kArrayVariance_sse2[] = { + VarianceParams(7, 7, &aom_variance128x128_sse2), + VarianceParams(7, 6, &aom_variance128x64_sse2), + VarianceParams(6, 7, &aom_variance64x128_sse2), + VarianceParams(6, 6, &aom_variance64x64_sse2), + VarianceParams(6, 5, &aom_variance64x32_sse2), + VarianceParams(5, 6, &aom_variance32x64_sse2), + VarianceParams(5, 5, &aom_variance32x32_sse2), + VarianceParams(5, 4, &aom_variance32x16_sse2), + VarianceParams(4, 5, &aom_variance16x32_sse2), + VarianceParams(4, 4, &aom_variance16x16_sse2), + VarianceParams(4, 3, &aom_variance16x8_sse2), + VarianceParams(3, 4, &aom_variance8x16_sse2), + VarianceParams(3, 3, &aom_variance8x8_sse2), + VarianceParams(3, 2, &aom_variance8x4_sse2), + VarianceParams(2, 3, &aom_variance4x8_sse2), + VarianceParams(2, 2, &aom_variance4x4_sse2), +#if !CONFIG_REALTIME_ONLY + VarianceParams(6, 4, &aom_variance64x16_sse2), + VarianceParams(5, 3, &aom_variance32x8_sse2), + VarianceParams(4, 6, &aom_variance16x64_sse2), + VarianceParams(4, 2, &aom_variance16x4_sse2), + VarianceParams(3, 5, &aom_variance8x32_sse2), + VarianceParams(2, 4, &aom_variance4x16_sse2), +#endif +}; +INSTANTIATE_TEST_SUITE_P(SSE2, AvxVarianceTest, + ::testing::ValuesIn(kArrayVariance_sse2)); + +const GetSseSumParams kArrayGetSseSum8x8Quad_sse2[] = { + GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_sse2, 0), + GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_sse2, 0), + GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_sse2, 0), + GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_sse2, 0) +}; +INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum8x8QuadTest, + ::testing::ValuesIn(kArrayGetSseSum8x8Quad_sse2)); + +const GetSseSumParamsDual kArrayGetSseSum16x16Dual_sse2[] = { + GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_sse2, 0), + GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_sse2, 0), + GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_sse2, 0), + GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_sse2, 0) +}; +INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum16x16DualTest, + ::testing::ValuesIn(kArrayGetSseSum16x16Dual_sse2)); + +const SubpelVarianceParams kArraySubpelVariance_sse2[] = { + SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_sse2, 0), + SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_sse2, 0), + SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_sse2, 0), + SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_sse2, 0), + SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_sse2, 0), + SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_sse2, 0), + SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_sse2, 0), + SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_sse2, 0), + SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_sse2, 0), + SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_sse2, 0), + SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_sse2, 0), + SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_sse2, 0), + SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_sse2, 0), + SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_sse2, 0), + SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_sse2, 0), + SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_sse2, 0), +#if !CONFIG_REALTIME_ONLY + SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_sse2, 0), + SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_sse2, 0), + SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_sse2, 0), + SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_sse2, 0), + SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_sse2, 0), + SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_sse2, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(SSE2, AvxSubpelVarianceTest, + ::testing::ValuesIn(kArraySubpelVariance_sse2)); + +const SubpelAvgVarianceParams kArraySubpelAvgVariance_sse2[] = { + SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_sse2, 0), + SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_sse2, 0), + SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_sse2, 0), + SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_sse2, 0), + SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_sse2, 0), + SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_sse2, 0), + SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_sse2, 0), + SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_sse2, 0), + SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_sse2, 0), + SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_sse2, 0), + SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_sse2, 0), + SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_sse2, 0), + SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_sse2, 0), + SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_sse2, 0), + SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_sse2, 0), + SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0), +#if !CONFIG_REALTIME_ONLY + SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_sse2, 0), + SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_sse2, 0), + SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_sse2, 0), + SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_sse2, 0), + SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_sse2, 0), + SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_sse2, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(SSE2, AvxSubpelAvgVarianceTest, + ::testing::ValuesIn(kArraySubpelAvgVariance_sse2)); + +#if CONFIG_AV1_HIGHBITDEPTH +#if HAVE_SSE2 +INSTANTIATE_TEST_SUITE_P( + SSE2, MseHBDWxHTest, + ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sse2, 10), + MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sse2, 10), + MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sse2, 10), + MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sse2, + 10))); +#endif // HAVE_SSE2 +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AvxSubpelVarianceTest, + ::testing::Values( + SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1, + 8), + SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1, + 10), + SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1, + 12))); + +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AvxSubpelAvgVarianceTest, + ::testing::Values( + SubpelAvgVarianceParams(2, 2, + &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1, + 8), + SubpelAvgVarianceParams(2, 2, + &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1, + 10), + SubpelAvgVarianceParams(2, 2, + &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1, + 12))); +#endif // HAVE_SSE4_1 + +INSTANTIATE_TEST_SUITE_P( + SSE2, AvxHBDMseTest, + ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2, 12), + MseParams(3, 3, &aom_highbd_12_mse8x8_sse2, 12), + MseParams(4, 4, &aom_highbd_10_mse16x16_sse2, 10), + MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10), + MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8), + MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8))); + +const VarianceParams kArrayHBDVariance_sse2[] = { + VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12), + VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12), + VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12), + VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12), + VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12), + VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12), + VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12), + VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12), + VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12), + VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12), + VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12), + VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12), + VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12), + VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10), + VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10), + VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10), + VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10), + VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10), + VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10), + VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10), + VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10), + VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10), + VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10), + VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10), + VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10), + VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10), + VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8), + VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8), + VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8), + VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8), + VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8), + VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8), + VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8), + VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8), + VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8), + VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8), + VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8), + VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8), + VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8), +#if !CONFIG_REALTIME_ONLY + VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12), + VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12), + VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12), + VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12), + // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12), + // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12), + VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10), + VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10), + VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10), + VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10), + // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10), + // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10), + VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8), + VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8), + VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8), + VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8), +// VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8), +// VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8), +#endif +}; +INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDVarianceTest, + ::testing::ValuesIn(kArrayHBDVariance_sse2)); + +#if HAVE_AVX2 + +INSTANTIATE_TEST_SUITE_P( + AVX2, MseHBDWxHTest, + ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_avx2, 10), + MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_avx2, 10), + MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_avx2, 10), + MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_avx2, + 10))); + +const VarianceParams kArrayHBDVariance_avx2[] = { + VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10), + VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10), + VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10), + VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10), + VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10), + VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10), + VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10), + VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10), + VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10), + VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10), + VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10), + VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10), + VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10), +#if !CONFIG_REALTIME_ONLY + VarianceParams(6, 4, &aom_highbd_10_variance64x16_avx2, 10), + VarianceParams(5, 3, &aom_highbd_10_variance32x8_avx2, 10), + VarianceParams(4, 6, &aom_highbd_10_variance16x64_avx2, 10), + VarianceParams(3, 5, &aom_highbd_10_variance8x32_avx2, 10), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDVarianceTest, + ::testing::ValuesIn(kArrayHBDVariance_avx2)); + +const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = { + SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_avx2, 10), + SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_avx2, 10), + SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_avx2, 10), + SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_avx2, 10), + SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_avx2, 10), + SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_avx2, 10), + SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_avx2, 10), + SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_avx2, 10), + SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_avx2, 10), + SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_avx2, 10), + SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_avx2, 10), + SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_avx2, 10), + SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_avx2, 10), +}; + +INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDSubpelVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelVariance_avx2)); +#endif // HAVE_AVX2 + +const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = { + SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_sse2, 12), + SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_sse2, 12), + SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_sse2, 12), + SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12), + SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12), + SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12), + SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12), + SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12), + SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12), + SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12), + SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12), + SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12), + SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12), + SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12), + SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_sse2, 10), + SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_sse2, 10), + SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_sse2, 10), + SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10), + SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10), + SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10), + SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10), + SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10), + SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10), + SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10), + SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10), + SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10), + SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10), + SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10), + SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_sse2, 8), + SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_sse2, 8), + SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_sse2, 8), + SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8), + SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8), + SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8), + SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8), + SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8), + SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8), + SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8), + SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8), + SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8), + SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8), + SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8), +#if !CONFIG_REALTIME_ONLY + SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12), + SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12), + SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12), + SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12), + SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12), + // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12), + SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10), + SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10), + SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10), + SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10), + SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10), + // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10), + SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8), + SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8), + SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8), + SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8), + SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8), +// SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8), +#endif +}; +INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2)); + +const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = { + SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2, + 12), + SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2, + 12), + SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2, + 12), + SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2, + 12), + SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2, + 12), + SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2, + 12), + SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2, + 12), + SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2, + 12), + SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2, + 12), + SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2, + 12), + SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2, + 12), + SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2, + 10), + SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2, + 10), + SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2, + 10), + SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2, + 10), + SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2, + 10), + SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2, + 10), + SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2, + 10), + SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2, + 10), + SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2, + 10), + SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2, + 10), + SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2, + 10), + SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2, + 8), + SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2, + 8), + SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2, + 8), + SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2, + 8), + SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2, + 8), + SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2, + 8), + SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2, + 8), + SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2, + 8), + SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2, + 8), + SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2, + 8), + SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2, + 8), + +#if !CONFIG_REALTIME_ONLY + SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2, + 12), + SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2, + 12), + SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2, + 12), + SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2, + 12), + SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2, + 12), + // SubpelAvgVarianceParams(2, 4, + // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12), + SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2, + 10), + SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2, + 10), + SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2, + 10), + SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2, + 10), + SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2, + 10), + // SubpelAvgVarianceParams(2, 4, + // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10), + SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2, + 8), + SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2, + 8), + SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2, + 8), + SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2, + 8), + SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2, + 8), +// SubpelAvgVarianceParams(2, 4, +// &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelAvgVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2)); +#endif // HAVE_SSE2 +#endif // CONFIG_AV1_HIGHBITDEPTH + +#if HAVE_SSSE3 +const SubpelVarianceParams kArraySubpelVariance_ssse3[] = { + SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0), + SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0), + SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0), + SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0), + SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0), + SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0), + SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0), + SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0), + SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0), + SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0), + SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0), + SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0), + SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0), + SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0), + SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0), + SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0), +#if !CONFIG_REALTIME_ONLY + SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0), + SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0), + SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0), + SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0), + SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0), + SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelVarianceTest, + ::testing::ValuesIn(kArraySubpelVariance_ssse3)); + +const SubpelAvgVarianceParams kArraySubpelAvgVariance_ssse3[] = { + SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3, 0), + SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3, 0), + SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3, 0), + SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0), + SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0), + SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0), + SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0), + SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0), + SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0), + SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0), + SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0), + SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0), + SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0), + SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0), + SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0), + SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0), +#if !CONFIG_REALTIME_ONLY + SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3, 0), + SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3, 0), + SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0), + SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0), + SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0), + SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelAvgVarianceTest, + ::testing::ValuesIn(kArraySubpelAvgVariance_ssse3)); + +const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_ssse3[] = { + DistWtdSubpelAvgVarianceParams( + 7, 7, &aom_dist_wtd_sub_pixel_avg_variance128x128_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 7, 6, &aom_dist_wtd_sub_pixel_avg_variance128x64_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 6, 7, &aom_dist_wtd_sub_pixel_avg_variance64x128_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_ssse3, 0), +#if !CONFIG_REALTIME_ONLY + DistWtdSubpelAvgVarianceParams( + 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_ssse3, 0), + DistWtdSubpelAvgVarianceParams( + 2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_ssse3, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P( + SSSE3, AvxDistWtdSubpelAvgVarianceTest, + ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_ssse3)); +#endif // HAVE_SSSE3 + +#if HAVE_SSE4_1 +#if !CONFIG_REALTIME_ONLY +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AvxObmcSubpelVarianceTest, + ::testing::Values( + ObmcSubpelVarianceParams(7, 7, + &aom_obmc_sub_pixel_variance128x128_sse4_1, 0), + ObmcSubpelVarianceParams(7, 6, + &aom_obmc_sub_pixel_variance128x64_sse4_1, 0), + ObmcSubpelVarianceParams(6, 7, + &aom_obmc_sub_pixel_variance64x128_sse4_1, 0), + ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1, + 0), + ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1, + 0), + ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1, + 0), + ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1, + 0), + ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1, + 0), + ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1, + 0), + ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1, + 0), + ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1, + 0), + ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1, + 0), + ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1, + 0), + ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1, + 0), + ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1, + 0), + ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1, + 0), + ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1, + 0), + ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1, + 0), + ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1, + 0), + ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1, + 0), + ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1, + 0), + ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1, + 0))); +#endif +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 + +INSTANTIATE_TEST_SUITE_P( + AVX2, MseWxHTest, + ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_avx2, 8), + MseWxHParams(3, 2, &aom_mse_wxh_16bit_avx2, 8), + MseWxHParams(2, 3, &aom_mse_wxh_16bit_avx2, 8), + MseWxHParams(2, 2, &aom_mse_wxh_16bit_avx2, 8))); + +INSTANTIATE_TEST_SUITE_P( + AVX2, Mse16xHTest, + ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_avx2, 8), + Mse16xHParams(3, 2, &aom_mse_16xh_16bit_avx2, 8), + Mse16xHParams(2, 3, &aom_mse_16xh_16bit_avx2, 8), + Mse16xHParams(2, 2, &aom_mse_16xh_16bit_avx2, 8))); + +INSTANTIATE_TEST_SUITE_P(AVX2, AvxMseTest, + ::testing::Values(MseParams(4, 4, + &aom_mse16x16_avx2))); + +const VarianceParams kArrayVariance_avx2[] = { + VarianceParams(7, 7, &aom_variance128x128_avx2), + VarianceParams(7, 6, &aom_variance128x64_avx2), + VarianceParams(6, 7, &aom_variance64x128_avx2), + VarianceParams(6, 6, &aom_variance64x64_avx2), + VarianceParams(6, 5, &aom_variance64x32_avx2), + VarianceParams(5, 6, &aom_variance32x64_avx2), + VarianceParams(5, 5, &aom_variance32x32_avx2), + VarianceParams(5, 4, &aom_variance32x16_avx2), + VarianceParams(4, 5, &aom_variance16x32_avx2), + VarianceParams(4, 4, &aom_variance16x16_avx2), + VarianceParams(4, 3, &aom_variance16x8_avx2), +#if !CONFIG_REALTIME_ONLY + VarianceParams(6, 4, &aom_variance64x16_avx2), + VarianceParams(4, 6, &aom_variance16x64_avx2), + VarianceParams(5, 3, &aom_variance32x8_avx2), + VarianceParams(4, 2, &aom_variance16x4_avx2), +#endif +}; +INSTANTIATE_TEST_SUITE_P(AVX2, AvxVarianceTest, + ::testing::ValuesIn(kArrayVariance_avx2)); + +const GetSseSumParams kArrayGetSseSum8x8Quad_avx2[] = { + GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_avx2, 0), + GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_avx2, 0), + GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_avx2, 0), + GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_avx2, 0) +}; +INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum8x8QuadTest, + ::testing::ValuesIn(kArrayGetSseSum8x8Quad_avx2)); + +const GetSseSumParamsDual kArrayGetSseSum16x16Dual_avx2[] = { + GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_avx2, 0), + GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_avx2, 0), + GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_avx2, 0), + GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_avx2, 0) +}; +INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum16x16DualTest, + ::testing::ValuesIn(kArrayGetSseSum16x16Dual_avx2)); + +const SubpelVarianceParams kArraySubpelVariance_avx2[] = { + SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0), + SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0), + SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0), + SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0), + SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0), + SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0), + SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0), + SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0), + + SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_avx2, 0), + SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_avx2, 0), + SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_avx2, 0), +#if !CONFIG_REALTIME_ONLY + SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_avx2, 0), + SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_avx2, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(AVX2, AvxSubpelVarianceTest, + ::testing::ValuesIn(kArraySubpelVariance_avx2)); + +INSTANTIATE_TEST_SUITE_P( + AVX2, AvxSubpelAvgVarianceTest, + ::testing::Values( + SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2, + 0), + SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2, + 0), + SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2, + 0), + SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0), + SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0), + SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0), + SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0), + SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2, + 0))); +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, MseWxHTest, + ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_neon, 8), + MseWxHParams(3, 2, &aom_mse_wxh_16bit_neon, 8), + MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8), + MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8))); + +INSTANTIATE_TEST_SUITE_P( + NEON, Mse16xHTest, + ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8), + Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8), + Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8), + Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8))); + +INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest, + ::testing::Values(aom_get_mb_ss_neon)); + +INSTANTIATE_TEST_SUITE_P(NEON, AvxMseTest, + ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon), + MseParams(3, 4, &aom_mse8x16_neon), + MseParams(4, 4, &aom_mse16x16_neon), + MseParams(4, 3, &aom_mse16x8_neon))); + +const VarianceParams kArrayVariance_neon[] = { + VarianceParams(7, 7, &aom_variance128x128_neon), + VarianceParams(6, 6, &aom_variance64x64_neon), + VarianceParams(7, 6, &aom_variance128x64_neon), + VarianceParams(6, 7, &aom_variance64x128_neon), + VarianceParams(6, 6, &aom_variance64x64_neon), + VarianceParams(6, 5, &aom_variance64x32_neon), + VarianceParams(5, 6, &aom_variance32x64_neon), + VarianceParams(5, 5, &aom_variance32x32_neon), + VarianceParams(5, 4, &aom_variance32x16_neon), + VarianceParams(4, 5, &aom_variance16x32_neon), + VarianceParams(4, 4, &aom_variance16x16_neon), + VarianceParams(4, 3, &aom_variance16x8_neon), + VarianceParams(3, 4, &aom_variance8x16_neon), + VarianceParams(3, 3, &aom_variance8x8_neon), + VarianceParams(3, 2, &aom_variance8x4_neon), + VarianceParams(2, 3, &aom_variance4x8_neon), + VarianceParams(2, 2, &aom_variance4x4_neon), +#if !CONFIG_REALTIME_ONLY + VarianceParams(2, 4, &aom_variance4x16_neon), + VarianceParams(4, 2, &aom_variance16x4_neon), + VarianceParams(3, 5, &aom_variance8x32_neon), + VarianceParams(5, 3, &aom_variance32x8_neon), + VarianceParams(4, 6, &aom_variance16x64_neon), + VarianceParams(6, 4, &aom_variance64x16_neon), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(NEON, AvxVarianceTest, + ::testing::ValuesIn(kArrayVariance_neon)); + +const SubpelVarianceParams kArraySubpelVariance_neon[] = { + SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_neon, 0), + SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_neon, 0), + SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_neon, 0), + SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0), + SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_neon, 0), + SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_neon, 0), + SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0), + SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_neon, 0), + SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_neon, 0), + SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0), + SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_neon, 0), + SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_neon, 0), + SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0), + SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_neon, 0), + SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_neon, 0), + SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_neon, 0), +#if !CONFIG_REALTIME_ONLY + SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_neon, 0), + SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_neon, 0), + SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_neon, 0), + SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_neon, 0), + SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_neon, 0), + SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_neon, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelVarianceTest, + ::testing::ValuesIn(kArraySubpelVariance_neon)); + +const SubpelAvgVarianceParams kArraySubpelAvgVariance_neon[] = { + SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_neon, 0), + SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_neon, 0), + SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_neon, 0), + SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_neon, 0), + SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_neon, 0), + SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_neon, 0), + SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_neon, 0), + SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_neon, 0), + SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_neon, 0), + SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_neon, 0), + SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_neon, 0), + SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_neon, 0), + SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_neon, 0), + SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_neon, 0), + SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_neon, 0), + SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_neon, 0), +#if !CONFIG_REALTIME_ONLY + SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_neon, 0), + SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_neon, 0), + SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_neon, 0), + SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_neon, 0), + SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_neon, 0), + SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_neon, 0), +#endif +}; +INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest, + ::testing::ValuesIn(kArraySubpelAvgVariance_neon)); + +const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_neon[] = { + DistWtdSubpelAvgVarianceParams( + 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_neon, 0), + DistWtdSubpelAvgVarianceParams( + 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_neon, 0), + DistWtdSubpelAvgVarianceParams( + 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_neon, 0), + DistWtdSubpelAvgVarianceParams( + 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_neon, 0), + DistWtdSubpelAvgVarianceParams( + 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_neon, 0), + DistWtdSubpelAvgVarianceParams( + 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_neon, 0), + DistWtdSubpelAvgVarianceParams( + 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_neon, 0), + DistWtdSubpelAvgVarianceParams( + 4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_neon, 0), + DistWtdSubpelAvgVarianceParams( + 3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_neon, 0), + DistWtdSubpelAvgVarianceParams( + 3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_neon, 0), + DistWtdSubpelAvgVarianceParams( + 3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_neon, 0), + DistWtdSubpelAvgVarianceParams( + 2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_neon, 0), + DistWtdSubpelAvgVarianceParams( + 2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_neon, 0), +#if !CONFIG_REALTIME_ONLY + DistWtdSubpelAvgVarianceParams( + 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_neon, 0), + DistWtdSubpelAvgVarianceParams( + 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_neon, 0), + DistWtdSubpelAvgVarianceParams( + 5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_neon, 0), + DistWtdSubpelAvgVarianceParams( + 3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_neon, 0), + DistWtdSubpelAvgVarianceParams( + 4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_neon, 0), + DistWtdSubpelAvgVarianceParams( + 2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_neon, 0), +#endif // !CONFIG_REALTIME_ONLY +}; +INSTANTIATE_TEST_SUITE_P( + NEON, AvxDistWtdSubpelAvgVarianceTest, + ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_neon)); + +#if !CONFIG_REALTIME_ONLY +const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = { + ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0), + ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0), + ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0), + ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0), + ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0), + ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0), + ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0), + ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0), + ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0), + ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0), + ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0), + ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0), + ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0), + ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0), + ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0), + ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0), + ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0), + ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0), + ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0), + ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0), + ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0), + ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0), +}; +INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest, + ::testing::ValuesIn(kArrayObmcSubpelVariance_neon)); +#endif + +const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = { + GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0), + GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0), + GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon, 0), + GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon, 0) +}; +INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum8x8QuadTest, + ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon)); + +const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon[] = { + GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon, 0), + GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon, 0), + GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon, 0), + GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon, 0) +}; +INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum16x16DualTest, + ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon)); + +#if CONFIG_AV1_HIGHBITDEPTH +const VarianceParams kArrayHBDVariance_neon[] = { + VarianceParams(7, 7, &aom_highbd_12_variance128x128_neon, 12), + VarianceParams(7, 6, &aom_highbd_12_variance128x64_neon, 12), + VarianceParams(6, 7, &aom_highbd_12_variance64x128_neon, 12), + VarianceParams(6, 6, &aom_highbd_12_variance64x64_neon, 12), + VarianceParams(6, 5, &aom_highbd_12_variance64x32_neon, 12), + VarianceParams(5, 6, &aom_highbd_12_variance32x64_neon, 12), + VarianceParams(5, 5, &aom_highbd_12_variance32x32_neon, 12), + VarianceParams(5, 4, &aom_highbd_12_variance32x16_neon, 12), + VarianceParams(4, 5, &aom_highbd_12_variance16x32_neon, 12), + VarianceParams(4, 4, &aom_highbd_12_variance16x16_neon, 12), + VarianceParams(4, 3, &aom_highbd_12_variance16x8_neon, 12), + VarianceParams(3, 4, &aom_highbd_12_variance8x16_neon, 12), + VarianceParams(3, 3, &aom_highbd_12_variance8x8_neon, 12), + VarianceParams(3, 2, &aom_highbd_12_variance8x4_neon, 12), + VarianceParams(2, 3, &aom_highbd_12_variance4x8_neon, 12), + VarianceParams(2, 2, &aom_highbd_12_variance4x4_neon, 12), + VarianceParams(7, 7, &aom_highbd_10_variance128x128_neon, 10), + VarianceParams(7, 6, &aom_highbd_10_variance128x64_neon, 10), + VarianceParams(6, 7, &aom_highbd_10_variance64x128_neon, 10), + VarianceParams(6, 6, &aom_highbd_10_variance64x64_neon, 10), + VarianceParams(6, 5, &aom_highbd_10_variance64x32_neon, 10), + VarianceParams(5, 6, &aom_highbd_10_variance32x64_neon, 10), + VarianceParams(5, 5, &aom_highbd_10_variance32x32_neon, 10), + VarianceParams(5, 4, &aom_highbd_10_variance32x16_neon, 10), + VarianceParams(4, 5, &aom_highbd_10_variance16x32_neon, 10), + VarianceParams(4, 4, &aom_highbd_10_variance16x16_neon, 10), + VarianceParams(4, 3, &aom_highbd_10_variance16x8_neon, 10), + VarianceParams(3, 4, &aom_highbd_10_variance8x16_neon, 10), + VarianceParams(3, 3, &aom_highbd_10_variance8x8_neon, 10), + VarianceParams(3, 2, &aom_highbd_10_variance8x4_neon, 10), + VarianceParams(2, 3, &aom_highbd_10_variance4x8_neon, 10), + VarianceParams(2, 2, &aom_highbd_10_variance4x4_neon, 10), + VarianceParams(7, 7, &aom_highbd_8_variance128x128_neon, 8), + VarianceParams(7, 6, &aom_highbd_8_variance128x64_neon, 8), + VarianceParams(6, 7, &aom_highbd_8_variance64x128_neon, 8), + VarianceParams(6, 6, &aom_highbd_8_variance64x64_neon, 8), + VarianceParams(6, 5, &aom_highbd_8_variance64x32_neon, 8), + VarianceParams(5, 6, &aom_highbd_8_variance32x64_neon, 8), + VarianceParams(5, 5, &aom_highbd_8_variance32x32_neon, 8), + VarianceParams(5, 4, &aom_highbd_8_variance32x16_neon, 8), + VarianceParams(4, 5, &aom_highbd_8_variance16x32_neon, 8), + VarianceParams(4, 4, &aom_highbd_8_variance16x16_neon, 8), + VarianceParams(4, 3, &aom_highbd_8_variance16x8_neon, 8), + VarianceParams(3, 4, &aom_highbd_8_variance8x16_neon, 8), + VarianceParams(3, 3, &aom_highbd_8_variance8x8_neon, 8), + VarianceParams(3, 2, &aom_highbd_8_variance8x4_neon, 8), + VarianceParams(2, 3, &aom_highbd_8_variance4x8_neon, 8), + VarianceParams(2, 2, &aom_highbd_8_variance4x4_neon, 8), +#if !CONFIG_REALTIME_ONLY + VarianceParams(6, 4, &aom_highbd_12_variance64x16_neon, 12), + VarianceParams(4, 6, &aom_highbd_12_variance16x64_neon, 12), + VarianceParams(5, 3, &aom_highbd_12_variance32x8_neon, 12), + VarianceParams(3, 5, &aom_highbd_12_variance8x32_neon, 12), + VarianceParams(4, 2, &aom_highbd_12_variance16x4_neon, 12), + VarianceParams(2, 4, &aom_highbd_12_variance4x16_neon, 12), + VarianceParams(6, 4, &aom_highbd_10_variance64x16_neon, 10), + VarianceParams(4, 6, &aom_highbd_10_variance16x64_neon, 10), + VarianceParams(5, 3, &aom_highbd_10_variance32x8_neon, 10), + VarianceParams(3, 5, &aom_highbd_10_variance8x32_neon, 10), + VarianceParams(4, 2, &aom_highbd_10_variance16x4_neon, 10), + VarianceParams(2, 4, &aom_highbd_10_variance4x16_neon, 10), + VarianceParams(6, 4, &aom_highbd_8_variance64x16_neon, 8), + VarianceParams(4, 6, &aom_highbd_8_variance16x64_neon, 8), + VarianceParams(5, 3, &aom_highbd_8_variance32x8_neon, 8), + VarianceParams(3, 5, &aom_highbd_8_variance8x32_neon, 8), + VarianceParams(4, 2, &aom_highbd_8_variance16x4_neon, 8), + VarianceParams(2, 4, &aom_highbd_8_variance4x16_neon, 8), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDVarianceTest, + ::testing::ValuesIn(kArrayHBDVariance_neon)); + +const SubpelVarianceParams kArrayHBDSubpelVariance_neon[] = { + SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_neon, 12), + SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_neon, 12), + SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_neon, 12), + SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_neon, 12), + SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_neon, 12), + SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_neon, 12), + SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_neon, 12), + SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_neon, 12), + SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_neon, 12), + SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_neon, 12), + SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_neon, 12), + SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_neon, 12), + SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_neon, 12), + SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_neon, 10), + SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_neon, 10), + SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_neon, 10), + SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_neon, 10), + SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_neon, 10), + SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_neon, 10), + SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_neon, 10), + SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_neon, 10), + SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_neon, 10), + SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_neon, 10), + SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_neon, 10), + SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_neon, 10), + SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_neon, 10), + SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_neon, 8), + SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_neon, 8), + SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_neon, 8), + SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_neon, 8), + SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_neon, 8), + SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_neon, 8), + SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_neon, 8), + SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_neon, 8), + SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_neon, 8), + SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_neon, 8), + SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_neon, 8), + SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_neon, 8), + SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_neon, 8), +#if !CONFIG_REALTIME_ONLY + SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_neon, 8), + SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_neon, 8), + SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_neon, 8), + SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_neon, 8), + SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_neon, 8), + SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_neon, 8), + SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_neon, 10), + SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_neon, 10), + SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_neon, 10), + SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_neon, 10), + SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_neon, 10), + SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_neon, 10), + SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_neon, 12), + SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_neon, 12), + SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_neon, 12), + SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_neon, 12), + SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_neon, 12), + SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_neon, 12), +#endif //! CONFIG_REALTIME_ONLY +}; + +INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelVariance_neon)); + +const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_neon[] = { + SubpelAvgVarianceParams(7, 7, + &aom_highbd_8_sub_pixel_avg_variance128x128_neon, 8), + SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_neon, + 8), + SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_neon, + 8), + SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_neon, + 8), + SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_neon, + 8), + SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_neon, + 8), + SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_neon, + 8), + SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_neon, + 8), + SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_neon, + 8), + SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_neon, + 8), + SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_neon, + 8), + SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_neon, + 8), + SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_neon, + 8), + SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_neon, + 8), + SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_neon, + 8), + SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_neon, + 8), + SubpelAvgVarianceParams( + 7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_neon, 10), + SubpelAvgVarianceParams(7, 6, + &aom_highbd_10_sub_pixel_avg_variance128x64_neon, 10), + SubpelAvgVarianceParams(6, 7, + &aom_highbd_10_sub_pixel_avg_variance64x128_neon, 10), + SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_neon, + 10), + SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_neon, + 10), + SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_neon, + 10), + SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_neon, + 10), + SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_neon, + 10), + SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_neon, + 10), + SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_neon, + 10), + SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_neon, + 10), + SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_neon, + 10), + SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_neon, + 10), + SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_neon, + 10), + SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_neon, + 10), + SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_neon, + 10), + SubpelAvgVarianceParams( + 7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_neon, 12), + SubpelAvgVarianceParams(7, 6, + &aom_highbd_12_sub_pixel_avg_variance128x64_neon, 12), + SubpelAvgVarianceParams(6, 7, + &aom_highbd_12_sub_pixel_avg_variance64x128_neon, 12), + SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_neon, + 12), + SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_neon, + 12), + SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_neon, + 12), + SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_neon, + 12), + SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_neon, + 12), + SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_neon, + 12), + SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_neon, + 12), + SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_neon, + 12), + SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_neon, + 12), + SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_neon, + 12), + SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_neon, + 12), + SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_neon, + 12), + SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_neon, + 12), + +#if !CONFIG_REALTIME_ONLY + SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_neon, + 8), + SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_neon, + 8), + SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_neon, + 8), + SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_neon, + 8), + SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_neon, + 8), + SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_neon, + 8), + SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_neon, + 10), + SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_neon, + 10), + SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_neon, + 10), + SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_neon, + 10), + SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_neon, + 10), + SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_neon, + 10), + SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_neon, + 12), + SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_neon, + 12), + SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_neon, + 12), + SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_neon, + 12), + SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_neon, + 12), + SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_neon, + 12), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelAvgVarianceTest, + ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_neon)); + +const DistWtdSubpelAvgVarianceParams + kArrayHBDDistWtdSubpelAvgVariance_neon[] = { + DistWtdSubpelAvgVarianceParams( + 7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_neon, 8), + DistWtdSubpelAvgVarianceParams( + 7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_neon, 8), + DistWtdSubpelAvgVarianceParams( + 6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_neon, 8), + DistWtdSubpelAvgVarianceParams( + 6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_neon, 8), + DistWtdSubpelAvgVarianceParams( + 6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_neon, 8), + DistWtdSubpelAvgVarianceParams( + 5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_neon, 8), + DistWtdSubpelAvgVarianceParams( + 5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_neon, 8), + DistWtdSubpelAvgVarianceParams( + 5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_neon, 8), + DistWtdSubpelAvgVarianceParams( + 4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_neon, 8), + DistWtdSubpelAvgVarianceParams( + 4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_neon, 8), + DistWtdSubpelAvgVarianceParams( + 4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_neon, 8), + DistWtdSubpelAvgVarianceParams( + 3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_neon, 8), + DistWtdSubpelAvgVarianceParams( + 3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_neon, 8), + DistWtdSubpelAvgVarianceParams( + 3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_neon, 8), + DistWtdSubpelAvgVarianceParams( + 2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_neon, 8), + DistWtdSubpelAvgVarianceParams( + 2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_neon, 8), + DistWtdSubpelAvgVarianceParams( + 7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_neon, 10), + DistWtdSubpelAvgVarianceParams( + 7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_neon, 10), + DistWtdSubpelAvgVarianceParams( + 6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_neon, 10), + DistWtdSubpelAvgVarianceParams( + 6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_neon, 10), + DistWtdSubpelAvgVarianceParams( + 6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_neon, 10), + DistWtdSubpelAvgVarianceParams( + 5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_neon, 10), + DistWtdSubpelAvgVarianceParams( + 5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_neon, 10), + DistWtdSubpelAvgVarianceParams( + 5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_neon, 10), + DistWtdSubpelAvgVarianceParams( + 4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_neon, 10), + DistWtdSubpelAvgVarianceParams( + 4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_neon, 10), + DistWtdSubpelAvgVarianceParams( + 4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_neon, 10), + DistWtdSubpelAvgVarianceParams( + 3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_neon, 10), + DistWtdSubpelAvgVarianceParams( + 3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_neon, 10), + DistWtdSubpelAvgVarianceParams( + 3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_neon, 10), + DistWtdSubpelAvgVarianceParams( + 2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_neon, 10), + DistWtdSubpelAvgVarianceParams( + 2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_neon, 10), + DistWtdSubpelAvgVarianceParams( + 7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_neon, 12), + DistWtdSubpelAvgVarianceParams( + 7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_neon, 12), + DistWtdSubpelAvgVarianceParams( + 6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_neon, 12), + DistWtdSubpelAvgVarianceParams( + 6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_neon, 12), + DistWtdSubpelAvgVarianceParams( + 6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_neon, 12), + DistWtdSubpelAvgVarianceParams( + 5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_neon, 12), + DistWtdSubpelAvgVarianceParams( + 5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_neon, 12), + DistWtdSubpelAvgVarianceParams( + 5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_neon, 12), + DistWtdSubpelAvgVarianceParams( + 4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_neon, 12), + DistWtdSubpelAvgVarianceParams( + 4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_neon, 12), + DistWtdSubpelAvgVarianceParams( + 4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_neon, 12), + DistWtdSubpelAvgVarianceParams( + 3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_neon, 12), + DistWtdSubpelAvgVarianceParams( + 3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_neon, 12), + DistWtdSubpelAvgVarianceParams( + 3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_neon, 12), + DistWtdSubpelAvgVarianceParams( + 2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_neon, 12), + DistWtdSubpelAvgVarianceParams( + 2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_neon, 12), + +#if !CONFIG_REALTIME_ONLY + DistWtdSubpelAvgVarianceParams( + 6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_neon, 8), + DistWtdSubpelAvgVarianceParams( + 4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_neon, 8), + DistWtdSubpelAvgVarianceParams( + 5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_neon, 8), + DistWtdSubpelAvgVarianceParams( + 3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_neon, 8), + DistWtdSubpelAvgVarianceParams( + 4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_neon, 8), + DistWtdSubpelAvgVarianceParams( + 2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_neon, 8), + DistWtdSubpelAvgVarianceParams( + 6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_neon, 10), + DistWtdSubpelAvgVarianceParams( + 4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_neon, 10), + DistWtdSubpelAvgVarianceParams( + 5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_neon, 10), + DistWtdSubpelAvgVarianceParams( + 3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_neon, 10), + DistWtdSubpelAvgVarianceParams( + 4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_neon, 10), + DistWtdSubpelAvgVarianceParams( + 2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_neon, 10), + DistWtdSubpelAvgVarianceParams( + 6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_neon, 12), + DistWtdSubpelAvgVarianceParams( + 4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_neon, 12), + DistWtdSubpelAvgVarianceParams( + 5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_neon, 12), + DistWtdSubpelAvgVarianceParams( + 3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_neon, 12), + DistWtdSubpelAvgVarianceParams( + 4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_neon, 12), + DistWtdSubpelAvgVarianceParams( + 2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_neon, 12), +#endif // !CONFIG_REALTIME_ONLY + }; +INSTANTIATE_TEST_SUITE_P( + NEON, AvxHBDDistWtdSubpelAvgVarianceTest, + ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_neon)); + +#if !CONFIG_REALTIME_ONLY +const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_neon[] = { + ObmcSubpelVarianceParams( + 7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_neon, 12), + ObmcSubpelVarianceParams( + 7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_neon, 12), + ObmcSubpelVarianceParams( + 6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_neon, 12), + ObmcSubpelVarianceParams( + 6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_neon, 12), + ObmcSubpelVarianceParams( + 6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_neon, 12), + ObmcSubpelVarianceParams( + 5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_neon, 12), + ObmcSubpelVarianceParams( + 5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_neon, 12), + ObmcSubpelVarianceParams( + 5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_neon, 12), + ObmcSubpelVarianceParams( + 4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_neon, 12), + ObmcSubpelVarianceParams( + 4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_neon, 12), + ObmcSubpelVarianceParams(4, 3, + &aom_highbd_12_obmc_sub_pixel_variance16x8_neon, 12), + ObmcSubpelVarianceParams(3, 4, + &aom_highbd_12_obmc_sub_pixel_variance8x16_neon, 12), + ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_neon, + 12), + ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_neon, + 12), + ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_neon, + 12), + ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_neon, + 12), + ObmcSubpelVarianceParams( + 6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_neon, 12), + ObmcSubpelVarianceParams( + 4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_neon, 12), + ObmcSubpelVarianceParams(5, 3, + &aom_highbd_12_obmc_sub_pixel_variance32x8_neon, 12), + ObmcSubpelVarianceParams(3, 5, + &aom_highbd_12_obmc_sub_pixel_variance8x32_neon, 12), + ObmcSubpelVarianceParams(4, 2, + &aom_highbd_12_obmc_sub_pixel_variance16x4_neon, 12), + ObmcSubpelVarianceParams(2, 4, + &aom_highbd_12_obmc_sub_pixel_variance4x16_neon, 12), + ObmcSubpelVarianceParams( + 7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_neon, 10), + ObmcSubpelVarianceParams( + 7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_neon, 10), + ObmcSubpelVarianceParams( + 6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_neon, 10), + ObmcSubpelVarianceParams( + 6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_neon, 10), + ObmcSubpelVarianceParams( + 6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_neon, 10), + ObmcSubpelVarianceParams( + 5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_neon, 10), + ObmcSubpelVarianceParams( + 5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_neon, 10), + ObmcSubpelVarianceParams( + 5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_neon, 10), + ObmcSubpelVarianceParams( + 4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_neon, 10), + ObmcSubpelVarianceParams( + 4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_neon, 10), + ObmcSubpelVarianceParams(4, 3, + &aom_highbd_10_obmc_sub_pixel_variance16x8_neon, 10), + ObmcSubpelVarianceParams(3, 4, + &aom_highbd_10_obmc_sub_pixel_variance8x16_neon, 10), + ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_neon, + 10), + ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_neon, + 10), + ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_neon, + 10), + ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_neon, + 10), + ObmcSubpelVarianceParams( + 6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_neon, 10), + ObmcSubpelVarianceParams( + 4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_neon, 10), + ObmcSubpelVarianceParams(5, 3, + &aom_highbd_10_obmc_sub_pixel_variance32x8_neon, 10), + ObmcSubpelVarianceParams(3, 5, + &aom_highbd_10_obmc_sub_pixel_variance8x32_neon, 10), + ObmcSubpelVarianceParams(4, 2, + &aom_highbd_10_obmc_sub_pixel_variance16x4_neon, 10), + ObmcSubpelVarianceParams(2, 4, + &aom_highbd_10_obmc_sub_pixel_variance4x16_neon, 10), + ObmcSubpelVarianceParams( + 7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_neon, 8), + ObmcSubpelVarianceParams(7, 6, + &aom_highbd_8_obmc_sub_pixel_variance128x64_neon, 8), + ObmcSubpelVarianceParams(6, 7, + &aom_highbd_8_obmc_sub_pixel_variance64x128_neon, 8), + ObmcSubpelVarianceParams(6, 6, + &aom_highbd_8_obmc_sub_pixel_variance64x64_neon, 8), + ObmcSubpelVarianceParams(6, 5, + &aom_highbd_8_obmc_sub_pixel_variance64x32_neon, 8), + ObmcSubpelVarianceParams(5, 6, + &aom_highbd_8_obmc_sub_pixel_variance32x64_neon, 8), + ObmcSubpelVarianceParams(5, 5, + &aom_highbd_8_obmc_sub_pixel_variance32x32_neon, 8), + ObmcSubpelVarianceParams(5, 4, + &aom_highbd_8_obmc_sub_pixel_variance32x16_neon, 8), + ObmcSubpelVarianceParams(4, 5, + &aom_highbd_8_obmc_sub_pixel_variance16x32_neon, 8), + ObmcSubpelVarianceParams(4, 4, + &aom_highbd_8_obmc_sub_pixel_variance16x16_neon, 8), + ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_neon, + 8), + ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_neon, + 8), + ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_neon, + 8), + ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_neon, + 8), + ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_neon, + 8), + ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_neon, + 8), + ObmcSubpelVarianceParams(6, 4, + &aom_highbd_8_obmc_sub_pixel_variance64x16_neon, 8), + ObmcSubpelVarianceParams(4, 6, + &aom_highbd_8_obmc_sub_pixel_variance16x64_neon, 8), + ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_neon, + 8), + ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_neon, + 8), + ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_neon, + 8), + ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_neon, + 8), +}; + +INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDObmcSubpelVarianceTest, + ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_neon)); +#endif // !CONFIG_REALTIME_ONLY + +#endif // CONFIG_AV1_HIGHBITDEPTH + +#endif // HAVE_NEON + +#if HAVE_NEON_DOTPROD + +const VarianceParams kArrayVariance_neon_dotprod[] = { + VarianceParams(7, 7, &aom_variance128x128_neon_dotprod), + VarianceParams(6, 6, &aom_variance64x64_neon_dotprod), + VarianceParams(7, 6, &aom_variance128x64_neon_dotprod), + VarianceParams(6, 7, &aom_variance64x128_neon_dotprod), + VarianceParams(6, 6, &aom_variance64x64_neon_dotprod), + VarianceParams(6, 5, &aom_variance64x32_neon_dotprod), + VarianceParams(5, 6, &aom_variance32x64_neon_dotprod), + VarianceParams(5, 5, &aom_variance32x32_neon_dotprod), + VarianceParams(5, 4, &aom_variance32x16_neon_dotprod), + VarianceParams(4, 5, &aom_variance16x32_neon_dotprod), + VarianceParams(4, 4, &aom_variance16x16_neon_dotprod), + VarianceParams(4, 3, &aom_variance16x8_neon_dotprod), + VarianceParams(3, 4, &aom_variance8x16_neon_dotprod), + VarianceParams(3, 3, &aom_variance8x8_neon_dotprod), + VarianceParams(3, 2, &aom_variance8x4_neon_dotprod), + VarianceParams(2, 3, &aom_variance4x8_neon_dotprod), + VarianceParams(2, 2, &aom_variance4x4_neon_dotprod), +#if !CONFIG_REALTIME_ONLY + VarianceParams(2, 4, &aom_variance4x16_neon_dotprod), + VarianceParams(4, 2, &aom_variance16x4_neon_dotprod), + VarianceParams(3, 5, &aom_variance8x32_neon_dotprod), + VarianceParams(5, 3, &aom_variance32x8_neon_dotprod), + VarianceParams(4, 6, &aom_variance16x64_neon_dotprod), + VarianceParams(6, 4, &aom_variance64x16_neon_dotprod), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AvxVarianceTest, + ::testing::ValuesIn(kArrayVariance_neon_dotprod)); + +const GetSseSumParams kArrayGetSseSum8x8Quad_neon_dotprod[] = { + GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0), + GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0), + GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0), + GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0) +}; +INSTANTIATE_TEST_SUITE_P( + NEON_DOTPROD, GetSseSum8x8QuadTest, + ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon_dotprod)); + +const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon_dotprod[] = { + GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0), + GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0), + GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0), + GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0) +}; +INSTANTIATE_TEST_SUITE_P( + NEON_DOTPROD, GetSseSum16x16DualTest, + ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon_dotprod)); + +INSTANTIATE_TEST_SUITE_P( + NEON_DOTPROD, AvxMseTest, + ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon_dotprod), + MseParams(3, 4, &aom_mse8x16_neon_dotprod), + MseParams(4, 4, &aom_mse16x16_neon_dotprod), + MseParams(4, 3, &aom_mse16x8_neon_dotprod))); + +#endif // HAVE_NEON_DOTPROD + +#if HAVE_SVE + +#if CONFIG_AV1_HIGHBITDEPTH +const VarianceParams kArrayHBDVariance_sve[] = { + VarianceParams(7, 7, &aom_highbd_12_variance128x128_sve, 12), + VarianceParams(7, 6, &aom_highbd_12_variance128x64_sve, 12), + VarianceParams(6, 7, &aom_highbd_12_variance64x128_sve, 12), + VarianceParams(6, 6, &aom_highbd_12_variance64x64_sve, 12), + VarianceParams(6, 5, &aom_highbd_12_variance64x32_sve, 12), + VarianceParams(5, 6, &aom_highbd_12_variance32x64_sve, 12), + VarianceParams(5, 5, &aom_highbd_12_variance32x32_sve, 12), + VarianceParams(5, 4, &aom_highbd_12_variance32x16_sve, 12), + VarianceParams(4, 5, &aom_highbd_12_variance16x32_sve, 12), + VarianceParams(4, 4, &aom_highbd_12_variance16x16_sve, 12), + VarianceParams(4, 3, &aom_highbd_12_variance16x8_sve, 12), + VarianceParams(3, 4, &aom_highbd_12_variance8x16_sve, 12), + VarianceParams(3, 3, &aom_highbd_12_variance8x8_sve, 12), + VarianceParams(3, 2, &aom_highbd_12_variance8x4_sve, 12), + VarianceParams(2, 3, &aom_highbd_12_variance4x8_sve, 12), + VarianceParams(2, 2, &aom_highbd_12_variance4x4_sve, 12), + VarianceParams(7, 7, &aom_highbd_10_variance128x128_sve, 10), + VarianceParams(7, 6, &aom_highbd_10_variance128x64_sve, 10), + VarianceParams(6, 7, &aom_highbd_10_variance64x128_sve, 10), + VarianceParams(6, 6, &aom_highbd_10_variance64x64_sve, 10), + VarianceParams(6, 5, &aom_highbd_10_variance64x32_sve, 10), + VarianceParams(5, 6, &aom_highbd_10_variance32x64_sve, 10), + VarianceParams(5, 5, &aom_highbd_10_variance32x32_sve, 10), + VarianceParams(5, 4, &aom_highbd_10_variance32x16_sve, 10), + VarianceParams(4, 5, &aom_highbd_10_variance16x32_sve, 10), + VarianceParams(4, 4, &aom_highbd_10_variance16x16_sve, 10), + VarianceParams(4, 3, &aom_highbd_10_variance16x8_sve, 10), + VarianceParams(3, 4, &aom_highbd_10_variance8x16_sve, 10), + VarianceParams(3, 3, &aom_highbd_10_variance8x8_sve, 10), + VarianceParams(3, 2, &aom_highbd_10_variance8x4_sve, 10), + VarianceParams(2, 3, &aom_highbd_10_variance4x8_sve, 10), + VarianceParams(2, 2, &aom_highbd_10_variance4x4_sve, 10), + VarianceParams(7, 7, &aom_highbd_8_variance128x128_sve, 8), + VarianceParams(7, 6, &aom_highbd_8_variance128x64_sve, 8), + VarianceParams(6, 7, &aom_highbd_8_variance64x128_sve, 8), + VarianceParams(6, 6, &aom_highbd_8_variance64x64_sve, 8), + VarianceParams(6, 5, &aom_highbd_8_variance64x32_sve, 8), + VarianceParams(5, 6, &aom_highbd_8_variance32x64_sve, 8), + VarianceParams(5, 5, &aom_highbd_8_variance32x32_sve, 8), + VarianceParams(5, 4, &aom_highbd_8_variance32x16_sve, 8), + VarianceParams(4, 5, &aom_highbd_8_variance16x32_sve, 8), + VarianceParams(4, 4, &aom_highbd_8_variance16x16_sve, 8), + VarianceParams(4, 3, &aom_highbd_8_variance16x8_sve, 8), + VarianceParams(3, 4, &aom_highbd_8_variance8x16_sve, 8), + VarianceParams(3, 3, &aom_highbd_8_variance8x8_sve, 8), + VarianceParams(3, 2, &aom_highbd_8_variance8x4_sve, 8), + VarianceParams(2, 3, &aom_highbd_8_variance4x8_sve, 8), + VarianceParams(2, 2, &aom_highbd_8_variance4x4_sve, 8), +#if !CONFIG_REALTIME_ONLY + VarianceParams(6, 4, &aom_highbd_12_variance64x16_sve, 12), + VarianceParams(4, 6, &aom_highbd_12_variance16x64_sve, 12), + VarianceParams(5, 3, &aom_highbd_12_variance32x8_sve, 12), + VarianceParams(3, 5, &aom_highbd_12_variance8x32_sve, 12), + VarianceParams(4, 2, &aom_highbd_12_variance16x4_sve, 12), + VarianceParams(2, 4, &aom_highbd_12_variance4x16_sve, 12), + VarianceParams(6, 4, &aom_highbd_10_variance64x16_sve, 10), + VarianceParams(4, 6, &aom_highbd_10_variance16x64_sve, 10), + VarianceParams(5, 3, &aom_highbd_10_variance32x8_sve, 10), + VarianceParams(3, 5, &aom_highbd_10_variance8x32_sve, 10), + VarianceParams(4, 2, &aom_highbd_10_variance16x4_sve, 10), + VarianceParams(2, 4, &aom_highbd_10_variance4x16_sve, 10), + VarianceParams(6, 4, &aom_highbd_8_variance64x16_sve, 8), + VarianceParams(4, 6, &aom_highbd_8_variance16x64_sve, 8), + VarianceParams(5, 3, &aom_highbd_8_variance32x8_sve, 8), + VarianceParams(3, 5, &aom_highbd_8_variance8x32_sve, 8), + VarianceParams(4, 2, &aom_highbd_8_variance16x4_sve, 8), + VarianceParams(2, 4, &aom_highbd_8_variance4x16_sve, 8), +#endif +}; + +INSTANTIATE_TEST_SUITE_P(SVE, AvxHBDVarianceTest, + ::testing::ValuesIn(kArrayHBDVariance_sve)); + +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_SVE + +} // namespace diff --git a/third_party/aom/test/video_source.h b/third_party/aom/test/video_source.h new file mode 100644 index 0000000000..9d73d7b253 --- /dev/null +++ b/third_party/aom/test/video_source.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef AOM_TEST_VIDEO_SOURCE_H_ +#define AOM_TEST_VIDEO_SOURCE_H_ + +#if defined(_WIN32) +#undef NOMINMAX +#define NOMINMAX +#undef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#endif +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <memory> +#include <string> + +#include "aom/aom_encoder.h" +#include "test/acm_random.h" +#if !defined(_WIN32) +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#endif + +namespace libaom_test { + +// Helper macros to ensure LIBAOM_TEST_DATA_PATH is a quoted string. +// These are undefined right below GetDataPath +// NOTE: LIBAOM_TEST_DATA_PATH MUST NOT be a quoted string before +// Stringification or the GetDataPath will fail at runtime +#define TO_STRING(S) #S +#define STRINGIFY(S) TO_STRING(S) + +// A simple function to encapsulate cross platform retrieval of test data path +static std::string GetDataPath() { + const char *const data_path = getenv("LIBAOM_TEST_DATA_PATH"); + if (data_path == nullptr) { +#ifdef LIBAOM_TEST_DATA_PATH + // In some environments, we cannot set environment variables + // Instead, we set the data path by using a preprocessor symbol + // which can be set from make files + return STRINGIFY(LIBAOM_TEST_DATA_PATH); +#else + return "."; +#endif + } + return data_path; +} + +// Undefining stringification macros because they are not used elsewhere +#undef TO_STRING +#undef STRINGIFY + +inline FILE *OpenTestDataFile(const std::string &file_name) { + const std::string path_to_source = GetDataPath() + "/" + file_name; + return fopen(path_to_source.c_str(), "rb"); +} + +static FILE *GetTempOutFile(std::string *file_name) { + file_name->clear(); +#if defined(_WIN32) + char fname[MAX_PATH]; + char tmppath[MAX_PATH]; + if (GetTempPathA(MAX_PATH, tmppath)) { + // Assume for now that the filename generated is unique per process + if (GetTempFileNameA(tmppath, "lvx", 0, fname)) { + file_name->assign(fname); + return fopen(fname, "wb+"); + } + } + return nullptr; +#else + std::string temp_dir = testing::TempDir(); + if (temp_dir.empty()) return nullptr; + // Versions of testing::TempDir() prior to release-1.11.0-214-g5e6a5336 may + // use the value of an environment variable without checking for a trailing + // path delimiter. + if (temp_dir[temp_dir.size() - 1] != '/') temp_dir += '/'; + const char name_template[] = "libaomtest.XXXXXX"; + std::unique_ptr<char[]> temp_file_name( + new char[temp_dir.size() + sizeof(name_template)]); + if (temp_file_name == nullptr) return nullptr; + memcpy(temp_file_name.get(), temp_dir.data(), temp_dir.size()); + memcpy(temp_file_name.get() + temp_dir.size(), name_template, + sizeof(name_template)); + const int fd = mkstemp(temp_file_name.get()); + if (fd == -1) return nullptr; + *file_name = temp_file_name.get(); + return fdopen(fd, "wb+"); +#endif +} + +class TempOutFile { + public: + TempOutFile() { file_ = GetTempOutFile(&file_name_); } + ~TempOutFile() { + CloseFile(); + if (!file_name_.empty()) { + EXPECT_EQ(0, remove(file_name_.c_str())); + } + } + FILE *file() { return file_; } + const std::string &file_name() { return file_name_; } + + protected: + void CloseFile() { + if (file_) { + fclose(file_); + file_ = nullptr; + } + } + FILE *file_; + std::string file_name_; +}; + +// Abstract base class for test video sources, which provide a stream of +// aom_image_t images with associated timestamps and duration. +class VideoSource { + public: + virtual ~VideoSource() = default; + + // Prepare the stream for reading, rewind/open as necessary. + virtual void Begin() = 0; + + // Advance the cursor to the next frame + virtual void Next() = 0; + + // Get the current video frame, or nullptr on End-Of-Stream. + virtual aom_image_t *img() const = 0; + + // Get the presentation timestamp of the current frame. + virtual aom_codec_pts_t pts() const = 0; + + // Get the current frame's duration + virtual unsigned long duration() const = 0; + + // Get the timebase for the stream + virtual aom_rational_t timebase() const = 0; + + // Get the current frame counter, starting at 0. + virtual unsigned int frame() const = 0; + + // Get the current file limit. + virtual unsigned int limit() const = 0; +}; + +class DummyVideoSource : public VideoSource { + public: + DummyVideoSource() + : img_(nullptr), limit_(100), width_(80), height_(64), + format_(AOM_IMG_FMT_I420) { + ReallocImage(); + } + + ~DummyVideoSource() override { aom_img_free(img_); } + + void Begin() override { + frame_ = 0; + FillFrame(); + } + + void Next() override { + ++frame_; + FillFrame(); + } + + aom_image_t *img() const override { + return (frame_ < limit_) ? img_ : nullptr; + } + + // Models a stream where Timebase = 1/FPS, so pts == frame. + aom_codec_pts_t pts() const override { return frame_; } + + unsigned long duration() const override { return 1; } + + aom_rational_t timebase() const override { + const aom_rational_t t = { 1, 30 }; + return t; + } + + unsigned int frame() const override { return frame_; } + + unsigned int limit() const override { return limit_; } + + void set_limit(unsigned int limit) { limit_ = limit; } + + void SetSize(unsigned int width, unsigned int height) { + if (width != width_ || height != height_) { + width_ = width; + height_ = height; + ReallocImage(); + } + } + + void SetImageFormat(aom_img_fmt_t format) { + if (format_ != format) { + format_ = format; + ReallocImage(); + } + } + + protected: + virtual void FillFrame() { + if (img_) memset(img_->img_data, 0, raw_sz_); + } + + void ReallocImage() { + aom_img_free(img_); + img_ = aom_img_alloc(nullptr, format_, width_, height_, 32); + ASSERT_NE(img_, nullptr); + raw_sz_ = ((img_->w + 31) & ~31u) * img_->h * img_->bps / 8; + } + + aom_image_t *img_; + size_t raw_sz_; + unsigned int limit_; + unsigned int frame_; + unsigned int width_; + unsigned int height_; + aom_img_fmt_t format_; +}; + +class RandomVideoSource : public DummyVideoSource { + public: + RandomVideoSource(int seed = ACMRandom::DeterministicSeed()) + : rnd_(seed), seed_(seed) {} + + // Reset the RNG to get a matching stream for the second pass + void Begin() override { + frame_ = 0; + rnd_.Reset(seed_); + FillFrame(); + } + + protected: + // 15 frames of noise, followed by 15 static frames. Reset to 0 rather + // than holding previous frames to encourage keyframes to be thrown. + void FillFrame() override { + if (img_) { + if (frame_ % 30 < 15) + for (size_t i = 0; i < raw_sz_; ++i) img_->img_data[i] = rnd_.Rand8(); + else + memset(img_->img_data, 0, raw_sz_); + } + } + + ACMRandom rnd_; + int seed_; +}; + +// Abstract base class for test video sources, which provide a stream of +// decompressed images to the decoder. +class CompressedVideoSource { + public: + virtual ~CompressedVideoSource() = default; + + virtual void Init() = 0; + + // Prepare the stream for reading, rewind/open as necessary. + virtual void Begin() = 0; + + // Advance the cursor to the next frame + virtual void Next() = 0; + + virtual const uint8_t *cxdata() const = 0; + + virtual size_t frame_size() const = 0; + + virtual unsigned int frame_number() const = 0; +}; + +} // namespace libaom_test + +#endif // AOM_TEST_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/visual_metrics.py b/third_party/aom/test/visual_metrics.py new file mode 100755 index 0000000000..9055feb334 --- /dev/null +++ b/third_party/aom/test/visual_metrics.py @@ -0,0 +1,466 @@ +#!/usr/bin/python +# +# Copyright (c) 2016, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# + +"""Converts video encoding result data from text files to visualization +data source.""" + +__author__ = "jzern@google.com (James Zern)," +__author__ += "jimbankoski@google.com (Jim Bankoski)" + +import fnmatch +import numpy as np +import scipy as sp +import scipy.interpolate +import os +import re +import string +import sys +import math +import warnings + +import gviz_api + +from os.path import basename +from os.path import splitext + +warnings.simplefilter('ignore', np.RankWarning) +warnings.simplefilter('ignore', RuntimeWarning) + +def bdsnr2(metric_set1, metric_set2): + """ + BJONTEGAARD Bjontegaard metric calculation adapted + Bjontegaard's snr metric allows to compute the average % saving in decibels + between two rate-distortion curves [1]. This is an adaptation of that + method that fixes inconsistencies when the curve fit operation goes awry + by replacing the curve fit function with a Piecewise Cubic Hermite + Interpolating Polynomial and then integrating that by evaluating that + function at small intervals using the trapezoid method to calculate + the integral. + + metric_set1 - list of tuples ( bitrate, metric ) for first graph + metric_set2 - list of tuples ( bitrate, metric ) for second graph + """ + + if not metric_set1 or not metric_set2: + return 0.0 + + try: + + # pchip_interlopate requires keys sorted by x axis. x-axis will + # be our metric not the bitrate so sort by metric. + metric_set1.sort() + metric_set2.sort() + + # Pull the log of the rate and clamped psnr from metric_sets. + log_rate1 = [math.log(x[0]) for x in metric_set1] + metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] + log_rate2 = [math.log(x[0]) for x in metric_set2] + metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] + + # Integration interval. This metric only works on the area that's + # overlapping. Extrapolation of these things is sketchy so we avoid. + min_int = max([min(log_rate1), min(log_rate2)]) + max_int = min([max(log_rate1), max(log_rate2)]) + + # No overlap means no sensible metric possible. + if max_int <= min_int: + return 0.0 + + # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to + # create 100 new samples points separated by interval. + lin = np.linspace(min_int, max_int, num=100, retstep=True) + interval = lin[1] + samples = lin[0] + v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples) + v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples) + + # Calculate the integral using the trapezoid method on the samples. + int_v1 = np.trapz(v1, dx=interval) + int_v2 = np.trapz(v2, dx=interval) + + # Calculate the average improvement. + avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) + + except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: + return 0 + + return avg_exp_diff + +def bdrate2(metric_set1, metric_set2): + """ + BJONTEGAARD Bjontegaard metric calculation adapted + Bjontegaard's metric allows to compute the average % saving in bitrate + between two rate-distortion curves [1]. This is an adaptation of that + method that fixes inconsistencies when the curve fit operation goes awry + by replacing the curve fit function with a Piecewise Cubic Hermite + Interpolating Polynomial and then integrating that by evaluating that + function at small intervals using the trapezoid method to calculate + the integral. + + metric_set1 - list of tuples ( bitrate, metric ) for first graph + metric_set2 - list of tuples ( bitrate, metric ) for second graph + """ + + if not metric_set1 or not metric_set2: + return 0.0 + + try: + + # pchip_interlopate requires keys sorted by x axis. x-axis will + # be our metric not the bitrate so sort by metric. + metric_set1.sort(key=lambda tup: tup[1]) + metric_set2.sort(key=lambda tup: tup[1]) + + # Pull the log of the rate and clamped psnr from metric_sets. + log_rate1 = [math.log(x[0]) for x in metric_set1] + metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] + log_rate2 = [math.log(x[0]) for x in metric_set2] + metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] + + # Integration interval. This metric only works on the area that's + # overlapping. Extrapolation of these things is sketchy so we avoid. + min_int = max([min(metric1), min(metric2)]) + max_int = min([max(metric1), max(metric2)]) + + # No overlap means no sensible metric possible. + if max_int <= min_int: + return 0.0 + + # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to + # create 100 new samples points separated by interval. + lin = np.linspace(min_int, max_int, num=100, retstep=True) + interval = lin[1] + samples = lin[0] + v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples) + v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples) + + # Calculate the integral using the trapezoid method on the samples. + int_v1 = np.trapz(v1, dx=interval) + int_v2 = np.trapz(v2, dx=interval) + + # Calculate the average improvement. + avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) + + except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: + return 0 + + # Convert to a percentage. + avg_diff = (math.exp(avg_exp_diff) - 1) * 100 + + return avg_diff + + + +def FillForm(string_for_substitution, dictionary_of_vars): + """ + This function substitutes all matches of the command string //%% ... %%// + with the variable represented by ... . + """ + return_string = string_for_substitution + for i in re.findall("//%%(.*)%%//", string_for_substitution): + return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i], + return_string) + return return_string + + +def HasMetrics(line): + """ + The metrics files produced by aomenc are started with a B for headers. + """ + # If the first char of the first word on the line is a digit + if len(line) == 0: + return False + if len(line.split()) == 0: + return False + if line.split()[0][0:1].isdigit(): + return True + return False + +def GetMetrics(file_name): + metric_file = open(file_name, "r") + return metric_file.readline().split(); + +def ParseMetricFile(file_name, metric_column): + metric_set1 = set([]) + metric_file = open(file_name, "r") + for line in metric_file: + metrics = string.split(line) + if HasMetrics(line): + if metric_column < len(metrics): + try: + tuple = float(metrics[0]), float(metrics[metric_column]) + except: + tuple = float(metrics[0]), 0 + else: + tuple = float(metrics[0]), 0 + metric_set1.add(tuple) + metric_set1_sorted = sorted(metric_set1) + return metric_set1_sorted + + +def FileBetter(file_name_1, file_name_2, metric_column, method): + """ + Compares two data files and determines which is better and by how + much. Also produces a histogram of how much better, by PSNR. + metric_column is the metric. + """ + # Store and parse our two files into lists of unique tuples. + + # Read the two files, parsing out lines starting with bitrate. + metric_set1_sorted = ParseMetricFile(file_name_1, metric_column) + metric_set2_sorted = ParseMetricFile(file_name_2, metric_column) + + + def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2): + """ + Search through the sorted metric file for metrics on either side of + the metric from file 1. Since both lists are sorted we really + should not have to search through the entire range, but these + are small files.""" + total_bitrate_difference_ratio = 0.0 + count = 0 + for bitrate, metric in metric_set1_sorted: + if bitrate == 0: + continue + for i in range(len(metric_set2_sorted) - 1): + s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i] + s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1] + # We have a point on either side of our metric range. + if metric > s2_metric_0 and metric <= s2_metric_1: + + # Calculate a slope. + if s2_metric_1 - s2_metric_0 != 0: + metric_slope = ((s2_bitrate_1 - s2_bitrate_0) / + (s2_metric_1 - s2_metric_0)) + else: + metric_slope = 0 + + estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) * + metric_slope) + + if estimated_s2_bitrate == 0: + continue + # Calculate percentage difference as given by base. + if base_is_set_2 == 0: + bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / + bitrate) + else: + bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / + estimated_s2_bitrate) + + total_bitrate_difference_ratio += bitrate_difference_ratio + count += 1 + break + + # Calculate the average improvement between graphs. + if count != 0: + avg = total_bitrate_difference_ratio / count + + else: + avg = 0.0 + + return avg + + # Be fair to both graphs by testing all the points in each. + if method == 'avg': + avg_improvement = 50 * ( + GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) - + GraphBetter(metric_set2_sorted, metric_set1_sorted, 0)) + elif method == 'dsnr': + avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted) + else: + avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted) + + return avg_improvement + + +def HandleFiles(variables): + """ + This script creates html for displaying metric data produced from data + in a video stats file, as created by the AOM project when enable_psnr + is turned on: + + Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..] + + The script parses each metrics file [see below] that matches the + statfile_pattern in the baseline directory and looks for the file that + matches that same file in each of the sub_dirs, and compares the resultant + metrics bitrate, avg psnr, glb psnr, and ssim. " + + It provides a table in which each row is a file in the line directory, + and a column for each subdir, with the cells representing how that clip + compares to baseline for that subdir. A graph is given for each which + compares filesize to that metric. If you click on a point in the graph it + zooms in on that point. + + a SAMPLE metrics file: + + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 25.911 38.242 38.104 38.258 38.121 75.790 14103 + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 49.982 41.264 41.129 41.255 41.122 83.993 19817 + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 74.967 42.911 42.767 42.899 42.756 87.928 17332 + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 100.012 43.983 43.838 43.881 43.738 89.695 25389 + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 149.980 45.338 45.203 45.184 45.043 91.591 25438 + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 199.852 46.225 46.123 46.113 45.999 92.679 28302 + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 249.922 46.864 46.773 46.777 46.673 93.334 27244 + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 299.998 47.366 47.281 47.317 47.220 93.844 27137 + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 349.769 47.746 47.677 47.722 47.648 94.178 32226 + Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) + 399.773 48.032 47.971 48.013 47.946 94.362 36203 + + sample use: + visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html + """ + + # The template file is the html file into which we will write the + # data from the stats file, formatted correctly for the gviz_api. + template_file = open(variables[1], "r") + page_template = template_file.read() + template_file.close() + + # This is the path match pattern for finding stats files amongst + # all the other files it could be. eg: *.stt + file_pattern = variables[2] + + # This is the directory with files that we will use to do the comparison + # against. + baseline_dir = variables[3] + snrs = '' + filestable = {} + + filestable['dsnr'] = '' + filestable['drate'] = '' + filestable['avg'] = '' + + # Dirs is directories after the baseline to compare to the base. + dirs = variables[4:len(variables)] + + # Find the metric files in the baseline directory. + dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern)) + + metrics = GetMetrics(baseline_dir + "/" + dir_list[0]) + + metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];' + + for column in range(1, len(metrics)): + + for metric in ['avg','dsnr','drate']: + description = {"file": ("string", "File")} + + # Go through each directory and add a column header to our description. + countoverall = {} + sumoverall = {} + + for directory in dirs: + description[directory] = ("number", directory) + countoverall[directory] = 0 + sumoverall[directory] = 0 + + # Data holds the data for the visualization, name given comes from + # gviz_api sample code. + data = [] + for filename in dir_list: + row = {'file': splitext(basename(filename))[0] } + baseline_file_name = baseline_dir + "/" + filename + + # Read the metric file from each of the directories in our list. + for directory in dirs: + metric_file_name = directory + "/" + filename + + # If there is a metric file in the current directory, open it + # and calculate its overall difference between it and the baseline + # directory's metric file. + if os.path.isfile(metric_file_name): + overall = FileBetter(baseline_file_name, metric_file_name, + column, metric) + row[directory] = overall + + sumoverall[directory] += overall + countoverall[directory] += 1 + + data.append(row) + + # Add the overall numbers. + row = {"file": "OVERALL" } + for directory in dirs: + row[directory] = sumoverall[directory] / countoverall[directory] + data.append(row) + + # write the tables out + data_table = gviz_api.DataTable(description) + data_table.LoadData(data) + + filestable[metric] = ( filestable[metric] + "filestable_" + metric + + "[" + str(column) + "]=" + + data_table.ToJSon(columns_order=["file"]+dirs) + "\n" ) + + filestable_avg = filestable['avg'] + filestable_dpsnr = filestable['dsnr'] + filestable_drate = filestable['drate'] + + # Now we collect all the data for all the graphs. First the column + # headers which will be Datarate and then each directory. + columns = ("datarate",baseline_dir) + description = {"datarate":("number", "Datarate")} + for directory in dirs: + description[directory] = ("number", directory) + + description[baseline_dir] = ("number", baseline_dir) + + snrs = snrs + "snrs[" + str(column) + "] = [" + + # Now collect the data for the graphs, file by file. + for filename in dir_list: + + data = [] + + # Collect the file in each directory and store all of its metrics + # in the associated gviz metrics table. + all_dirs = dirs + [baseline_dir] + for directory in all_dirs: + + metric_file_name = directory + "/" + filename + if not os.path.isfile(metric_file_name): + continue + + # Read and parse the metrics file storing it to the data we'll + # use for the gviz_api.Datatable. + metrics = ParseMetricFile(metric_file_name, column) + for bitrate, metric in metrics: + data.append({"datarate": bitrate, directory: metric}) + + data_table = gviz_api.DataTable(description) + data_table.LoadData(data) + snrs = snrs + "'" + data_table.ToJSon( + columns_order=tuple(["datarate",baseline_dir]+dirs)) + "'," + + snrs = snrs + "]\n" + + formatters = "" + for i in range(len(dirs)): + formatters = "%s formatter.format(better, %d);" % (formatters, i+1) + + print FillForm(page_template, vars()) + return + +if len(sys.argv) < 3: + print HandleFiles.__doc__ +else: + HandleFiles(sys.argv) diff --git a/third_party/aom/test/warp_filter_test.cc b/third_party/aom/test/warp_filter_test.cc new file mode 100644 index 0000000000..f0be7d226b --- /dev/null +++ b/third_party/aom/test/warp_filter_test.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <tuple> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/warp_filter_test_util.h" +using libaom_test::ACMRandom; +#if CONFIG_AV1_HIGHBITDEPTH +using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest; +#endif +using libaom_test::AV1WarpFilter::AV1WarpFilterTest; +using std::make_tuple; +using std::tuple; + +namespace { + +TEST_P(AV1WarpFilterTest, CheckOutput) { + RunCheckOutput(std::get<3>(GET_PARAM(0))); +} +TEST_P(AV1WarpFilterTest, DISABLED_Speed) { + RunSpeedTest(std::get<3>(GET_PARAM(0))); +} + +INSTANTIATE_TEST_SUITE_P( + C, AV1WarpFilterTest, + libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_c)); + +#if CONFIG_AV1_HIGHBITDEPTH && (HAVE_SSE4_1 || HAVE_NEON) +TEST_P(AV1HighbdWarpFilterTest, CheckOutput) { + RunCheckOutput(std::get<4>(GET_PARAM(0))); +} +TEST_P(AV1HighbdWarpFilterTest, DISABLED_Speed) { + RunSpeedTest(std::get<4>(GET_PARAM(0))); +} +#endif // CONFIG_AV1_HIGHBITDEPTH && (HAVE_SSE4_1 || HAVE_NEON) + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P( + SSE4_1, AV1WarpFilterTest, + libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sse4_1)); + +#if CONFIG_AV1_HIGHBITDEPTH +INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdWarpFilterTest, + libaom_test::AV1HighbdWarpFilter::BuildParams( + av1_highbd_warp_affine_sse4_1)); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1WarpFilterTest, + libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_avx2)); + +#if CONFIG_AV1_HIGHBITDEPTH +INSTANTIATE_TEST_SUITE_P( + AVX2, AV1HighbdWarpFilterTest, + libaom_test::AV1HighbdWarpFilter::BuildParams(av1_highbd_warp_affine_avx2)); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P( + NEON, AV1WarpFilterTest, + libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_neon)); + +#if CONFIG_AV1_HIGHBITDEPTH +INSTANTIATE_TEST_SUITE_P( + NEON, AV1HighbdWarpFilterTest, + libaom_test::AV1HighbdWarpFilter::BuildParams(av1_highbd_warp_affine_neon)); +#endif // CONFIG_AV1_HIGHBITDEPTH +#endif // HAVE_NEON + +#if HAVE_NEON_I8MM +INSTANTIATE_TEST_SUITE_P( + NEON_I8MM, AV1WarpFilterTest, + libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_neon_i8mm)); +#endif // HAVE_NEON_I8MM + +#if HAVE_SVE +INSTANTIATE_TEST_SUITE_P( + SVE, AV1WarpFilterTest, + libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sve)); +#endif // HAVE_SVE + +} // namespace diff --git a/third_party/aom/test/warp_filter_test_util.cc b/third_party/aom/test/warp_filter_test_util.cc new file mode 100644 index 0000000000..470c980777 --- /dev/null +++ b/third_party/aom/test/warp_filter_test_util.cc @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <memory> +#include <new> + +#include "aom_ports/aom_timer.h" +#include "test/warp_filter_test_util.h" + +using std::make_tuple; +using std::tuple; + +namespace libaom_test { + +int32_t random_warped_param(libaom_test::ACMRandom *rnd, int bits, + int rnd_gen_zeros) { + // Avoid accidentally generating a zero in speed tests, they are set by the + // is_*_zero parameters instead. + if (rnd_gen_zeros) { + // 1 in 8 chance of generating zero (arbitrarily chosen) + if (((rnd->Rand8()) & 7) == 0) return 0; + } + // Otherwise, enerate uniform values in the range + // [-(1 << bits), 1] U [1, 1<<bits] + int32_t v = 1 + (rnd->Rand16() & ((1 << bits) - 1)); + if ((rnd->Rand8()) & 1) return -v; + return v; +} + +void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat, + int16_t *alpha, int16_t *beta, int16_t *gamma, + int16_t *delta, const int is_alpha_zero, + const int is_beta_zero, const int is_gamma_zero, + const int is_delta_zero, const int rnd_gen_zeros) { + while (true) { + int rnd8 = rnd->Rand8() & 3; + mat[0] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6, rnd_gen_zeros); + mat[1] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6, rnd_gen_zeros); + mat[2] = + (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) + + (1 << WARPEDMODEL_PREC_BITS); + mat[3] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros); + + if (rnd8 <= 1) { + // AFFINE + mat[4] = + random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros); + mat[5] = + (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) + + (1 << WARPEDMODEL_PREC_BITS); + } else if (rnd8 == 2) { + mat[4] = -mat[3]; + mat[5] = mat[2]; + } else { + mat[4] = + random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros); + mat[5] = + (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3, rnd_gen_zeros)) + + (1 << WARPEDMODEL_PREC_BITS); + } + + if (is_alpha_zero == 1) { + mat[2] = 1 << WARPEDMODEL_PREC_BITS; + } + if (is_beta_zero == 1) { + mat[3] = 0; + } + if (is_gamma_zero == 1) { + mat[4] = 0; + } + if (is_delta_zero == 1) { + mat[5] = static_cast<int32_t>( + ((static_cast<int64_t>(mat[3]) * mat[4] + (mat[2] / 2)) / mat[2]) + + (1 << WARPEDMODEL_PREC_BITS)); + } + + // Calculate the derived parameters and check that they are suitable + // for the warp filter. + assert(mat[2] != 0); + + *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX); + *beta = clamp(mat[3], INT16_MIN, INT16_MAX); + *gamma = static_cast<int16_t>(clamp64( + (static_cast<int64_t>(mat[4]) * (1 << WARPEDMODEL_PREC_BITS)) / mat[2], + INT16_MIN, INT16_MAX)); + *delta = static_cast<int16_t>(clamp64( + mat[5] - + ((static_cast<int64_t>(mat[3]) * mat[4] + (mat[2] / 2)) / mat[2]) - + (1 << WARPEDMODEL_PREC_BITS), + INT16_MIN, INT16_MAX)); + + if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) || + (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS))) + continue; + + *alpha = ROUND_POWER_OF_TWO_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS) * + (1 << WARP_PARAM_REDUCE_BITS); + *beta = ROUND_POWER_OF_TWO_SIGNED(*beta, WARP_PARAM_REDUCE_BITS) * + (1 << WARP_PARAM_REDUCE_BITS); + *gamma = ROUND_POWER_OF_TWO_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS) * + (1 << WARP_PARAM_REDUCE_BITS); + *delta = ROUND_POWER_OF_TWO_SIGNED(*delta, WARP_PARAM_REDUCE_BITS) * + (1 << WARP_PARAM_REDUCE_BITS); + + // We have a valid model, so finish + return; + } +} + +namespace AV1WarpFilter { +::testing::internal::ParamGenerator<WarpTestParams> BuildParams( + warp_affine_func filter) { + WarpTestParam params[] = { + make_tuple(4, 4, 5000, filter), make_tuple(8, 8, 5000, filter), + make_tuple(64, 64, 100, filter), make_tuple(4, 16, 2000, filter), + make_tuple(32, 8, 1000, filter), + }; + return ::testing::Combine(::testing::ValuesIn(params), + ::testing::Values(0, 1), ::testing::Values(0, 1), + ::testing::Values(0, 1), ::testing::Values(0, 1)); +} + +AV1WarpFilterTest::~AV1WarpFilterTest() = default; +void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); } + +void AV1WarpFilterTest::RunSpeedTest(warp_affine_func test_impl) { + const int w = 128, h = 128; + const int border = 16; + const int stride = w + 2 * border; + WarpTestParam params = GET_PARAM(0); + const int out_w = std::get<0>(params), out_h = std::get<1>(params); + const int is_alpha_zero = GET_PARAM(1); + const int is_beta_zero = GET_PARAM(2); + const int is_gamma_zero = GET_PARAM(3); + const int is_delta_zero = GET_PARAM(4); + int sub_x, sub_y; + const int bd = 8; + + std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * stride]); + ASSERT_NE(input_, nullptr); + uint8_t *input = input_.get() + border; + + // The warp functions always write rows with widths that are multiples of 8. + // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. + int output_n = ((out_w + 7) & ~7) * out_h; + std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]); + ASSERT_NE(output, nullptr); + int32_t mat[8]; + int16_t alpha, beta, gamma, delta; + ConvolveParams conv_params = get_conv_params(0, 0, bd); + std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow) + CONV_BUF_TYPE[output_n]); + ASSERT_NE(dsta, nullptr); + generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta, + is_alpha_zero, is_beta_zero, is_gamma_zero, + is_delta_zero, 0); + + for (int r = 0; r < h; ++r) + for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8(); + for (int r = 0; r < h; ++r) { + memset(input + r * stride - border, input[r * stride], border); + memset(input + r * stride + w, input[r * stride + (w - 1)], border); + } + + sub_x = 0; + sub_y = 0; + int do_average = 0; + + conv_params = + get_conv_params_no_round(do_average, 0, dsta.get(), out_w, 1, bd); + conv_params.use_dist_wtd_comp_avg = 0; + + const int num_loops = 1000000000 / (out_w + out_h); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < num_loops; ++i) + test_impl(mat, input, w, h, stride, output.get(), 32, 32, out_w, out_h, + out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma, delta); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("warp %3dx%-3d alpha=%d beta=%d gamma=%d delta=%d: %7.2f ns \n", out_w, + out_h, alpha, beta, gamma, delta, 1000.0 * elapsed_time / num_loops); +} + +void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) { + const int w = 128, h = 128; + const int border = 16; + const int stride = w + 2 * border; + WarpTestParam params = GET_PARAM(0); + const int is_alpha_zero = GET_PARAM(1); + const int is_beta_zero = GET_PARAM(2); + const int is_gamma_zero = GET_PARAM(3); + const int is_delta_zero = GET_PARAM(4); + const int out_w = std::get<0>(params), out_h = std::get<1>(params); + const int num_iters = std::get<2>(params); + const int bd = 8; + + // The warp functions always write rows with widths that are multiples of 8. + // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. + int output_n = ((out_w + 7) & ~7) * out_h; + std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * stride]); + ASSERT_NE(input_, nullptr); + uint8_t *input = input_.get() + border; + std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]); + ASSERT_NE(output, nullptr); + std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]); + ASSERT_NE(output2, nullptr); + int32_t mat[8]; + int16_t alpha, beta, gamma, delta; + ConvolveParams conv_params = get_conv_params(0, 0, bd); + std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow) + CONV_BUF_TYPE[output_n]); + ASSERT_NE(dsta, nullptr); + std::unique_ptr<CONV_BUF_TYPE[]> dstb(new (std::nothrow) + CONV_BUF_TYPE[output_n]); + ASSERT_NE(dstb, nullptr); + for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand8(); + + for (int i = 0; i < num_iters; ++i) { + // Generate an input block and extend its borders horizontally + for (int r = 0; r < h; ++r) + for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8(); + for (int r = 0; r < h; ++r) { + memset(input + r * stride - border, input[r * stride], border); + memset(input + r * stride + w, input[r * stride + (w - 1)], border); + } + const int use_no_round = rnd_.Rand8() & 1; + for (int sub_x = 0; sub_x < 2; ++sub_x) + for (int sub_y = 0; sub_y < 2; ++sub_y) { + generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta, + is_alpha_zero, is_beta_zero, is_gamma_zero, + is_delta_zero, 1); + + for (int ii = 0; ii < 2; ++ii) { + for (int jj = 0; jj < 5; ++jj) { + for (int do_average = 0; do_average <= 1; ++do_average) { + if (use_no_round) { + conv_params = get_conv_params_no_round( + do_average, 0, dsta.get(), out_w, 1, bd); + } else { + conv_params = get_conv_params(0, 0, bd); + } + if (jj >= 4) { + conv_params.use_dist_wtd_comp_avg = 0; + } else { + conv_params.use_dist_wtd_comp_avg = 1; + conv_params.fwd_offset = quant_dist_lookup_table[jj][ii]; + conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii]; + } + av1_warp_affine_c(mat, input, w, h, stride, output.get(), 32, 32, + out_w, out_h, out_w, sub_x, sub_y, &conv_params, + alpha, beta, gamma, delta); + if (use_no_round) { + conv_params = get_conv_params_no_round( + do_average, 0, dstb.get(), out_w, 1, bd); + } + if (jj >= 4) { + conv_params.use_dist_wtd_comp_avg = 0; + } else { + conv_params.use_dist_wtd_comp_avg = 1; + conv_params.fwd_offset = quant_dist_lookup_table[jj][ii]; + conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii]; + } + test_impl(mat, input, w, h, stride, output2.get(), 32, 32, out_w, + out_h, out_w, sub_x, sub_y, &conv_params, alpha, beta, + gamma, delta); + if (use_no_round) { + for (int j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(dsta[j], dstb[j]) + << "Pixel mismatch at index " << j << " = (" + << (j % out_w) << ", " << (j / out_w) << ") on iteration " + << i; + for (int j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" + << (j % out_w) << ", " << (j / out_w) << ") on iteration " + << i; + } else { + for (int j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" + << (j % out_w) << ", " << (j / out_w) << ") on iteration " + << i; + } + } + } + } + } + } +} +} // namespace AV1WarpFilter + +#if CONFIG_AV1_HIGHBITDEPTH +namespace AV1HighbdWarpFilter { +::testing::internal::ParamGenerator<HighbdWarpTestParams> BuildParams( + highbd_warp_affine_func filter) { + const HighbdWarpTestParam params[] = { + make_tuple(4, 4, 100, 8, filter), make_tuple(8, 8, 100, 8, filter), + make_tuple(64, 64, 100, 8, filter), make_tuple(4, 16, 100, 8, filter), + make_tuple(32, 8, 100, 8, filter), make_tuple(4, 4, 100, 10, filter), + make_tuple(8, 8, 100, 10, filter), make_tuple(64, 64, 100, 10, filter), + make_tuple(4, 16, 100, 10, filter), make_tuple(32, 8, 100, 10, filter), + make_tuple(4, 4, 100, 12, filter), make_tuple(8, 8, 100, 12, filter), + make_tuple(64, 64, 100, 12, filter), make_tuple(4, 16, 100, 12, filter), + make_tuple(32, 8, 100, 12, filter), + }; + return ::testing::Combine(::testing::ValuesIn(params), + ::testing::Values(0, 1), ::testing::Values(0, 1), + ::testing::Values(0, 1), ::testing::Values(0, 1)); +} + +AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() = default; +void AV1HighbdWarpFilterTest::SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); +} + +void AV1HighbdWarpFilterTest::RunSpeedTest(highbd_warp_affine_func test_impl) { + const int w = 128, h = 128; + const int border = 16; + const int stride = w + 2 * border; + HighbdWarpTestParam param = GET_PARAM(0); + const int is_alpha_zero = GET_PARAM(1); + const int is_beta_zero = GET_PARAM(2); + const int is_gamma_zero = GET_PARAM(3); + const int is_delta_zero = GET_PARAM(4); + const int out_w = std::get<0>(param), out_h = std::get<1>(param); + const int bd = std::get<3>(param); + const int mask = (1 << bd) - 1; + int sub_x, sub_y; + + // The warp functions always write rows with widths that are multiples of 8. + // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. + int output_n = ((out_w + 7) & ~7) * out_h; + std::unique_ptr<uint16_t[]> input_(new (std::nothrow) uint16_t[h * stride]); + ASSERT_NE(input_, nullptr); + uint16_t *input = input_.get() + border; + std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]); + ASSERT_NE(output, nullptr); + int32_t mat[8]; + int16_t alpha, beta, gamma, delta; + ConvolveParams conv_params = get_conv_params(0, 0, bd); + std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow) + CONV_BUF_TYPE[output_n]); + ASSERT_NE(dsta, nullptr); + + generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta, + is_alpha_zero, is_beta_zero, is_gamma_zero, + is_delta_zero, 0); + // Generate an input block and extend its borders horizontally + for (int r = 0; r < h; ++r) + for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask; + for (int r = 0; r < h; ++r) { + for (int c = 0; c < border; ++c) { + input[r * stride - border + c] = input[r * stride]; + input[r * stride + w + c] = input[r * stride + (w - 1)]; + } + } + + sub_x = 0; + sub_y = 0; + int do_average = 0; + conv_params.use_dist_wtd_comp_avg = 0; + conv_params = + get_conv_params_no_round(do_average, 0, dsta.get(), out_w, 1, bd); + + const int num_loops = 1000000000 / (out_w + out_h); + aom_usec_timer timer; + aom_usec_timer_start(&timer); + + for (int i = 0; i < num_loops; ++i) + test_impl(mat, input, w, h, stride, output.get(), 32, 32, out_w, out_h, + out_w, sub_x, sub_y, bd, &conv_params, alpha, beta, gamma, delta); + + aom_usec_timer_mark(&timer); + const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); + printf("highbd warp %3dx%-3d alpha=%d beta=%d gamma=%d delta=%d: %7.2f ns \n", + out_w, out_h, alpha, beta, gamma, delta, + 1000.0 * elapsed_time / num_loops); +} + +void AV1HighbdWarpFilterTest::RunCheckOutput( + highbd_warp_affine_func test_impl) { + const int w = 128, h = 128; + const int border = 16; + const int stride = w + 2 * border; + HighbdWarpTestParam param = GET_PARAM(0); + const int is_alpha_zero = GET_PARAM(1); + const int is_beta_zero = GET_PARAM(2); + const int is_gamma_zero = GET_PARAM(3); + const int is_delta_zero = GET_PARAM(4); + const int out_w = std::get<0>(param), out_h = std::get<1>(param); + const int bd = std::get<3>(param); + const int num_iters = std::get<2>(param); + const int mask = (1 << bd) - 1; + + // The warp functions always write rows with widths that are multiples of 8. + // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8. + int output_n = ((out_w + 7) & ~7) * out_h; + std::unique_ptr<uint16_t[]> input_(new (std::nothrow) uint16_t[h * stride]); + ASSERT_NE(input_, nullptr); + uint16_t *input = input_.get() + border; + std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]); + ASSERT_NE(output, nullptr); + std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]); + ASSERT_NE(output2, nullptr); + int32_t mat[8]; + int16_t alpha, beta, gamma, delta; + ConvolveParams conv_params = get_conv_params(0, 0, bd); + std::unique_ptr<CONV_BUF_TYPE[]> dsta(new (std::nothrow) + CONV_BUF_TYPE[output_n]); + ASSERT_NE(dsta, nullptr); + std::unique_ptr<CONV_BUF_TYPE[]> dstb(new (std::nothrow) + CONV_BUF_TYPE[output_n]); + ASSERT_NE(dstb, nullptr); + for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand16(); + + for (int i = 0; i < num_iters; ++i) { + // Generate an input block and extend its borders horizontally + for (int r = 0; r < h; ++r) + for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask; + for (int r = 0; r < h; ++r) { + for (int c = 0; c < border; ++c) { + input[r * stride - border + c] = input[r * stride]; + input[r * stride + w + c] = input[r * stride + (w - 1)]; + } + } + const int use_no_round = rnd_.Rand8() & 1; + for (int sub_x = 0; sub_x < 2; ++sub_x) + for (int sub_y = 0; sub_y < 2; ++sub_y) { + generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta, + is_alpha_zero, is_beta_zero, is_gamma_zero, + is_delta_zero, 1); + for (int ii = 0; ii < 2; ++ii) { + for (int jj = 0; jj < 5; ++jj) { + for (int do_average = 0; do_average <= 1; ++do_average) { + if (use_no_round) { + conv_params = get_conv_params_no_round( + do_average, 0, dsta.get(), out_w, 1, bd); + } else { + conv_params = get_conv_params(0, 0, bd); + } + if (jj >= 4) { + conv_params.use_dist_wtd_comp_avg = 0; + } else { + conv_params.use_dist_wtd_comp_avg = 1; + conv_params.fwd_offset = quant_dist_lookup_table[jj][ii]; + conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii]; + } + + av1_highbd_warp_affine_c(mat, input, w, h, stride, output.get(), + 32, 32, out_w, out_h, out_w, sub_x, + sub_y, bd, &conv_params, alpha, beta, + gamma, delta); + if (use_no_round) { + // TODO(angiebird): Change this to test_impl once we have SIMD + // implementation + conv_params = get_conv_params_no_round( + do_average, 0, dstb.get(), out_w, 1, bd); + } + if (jj >= 4) { + conv_params.use_dist_wtd_comp_avg = 0; + } else { + conv_params.use_dist_wtd_comp_avg = 1; + conv_params.fwd_offset = quant_dist_lookup_table[jj][ii]; + conv_params.bck_offset = quant_dist_lookup_table[jj][1 - ii]; + } + test_impl(mat, input, w, h, stride, output2.get(), 32, 32, out_w, + out_h, out_w, sub_x, sub_y, bd, &conv_params, alpha, + beta, gamma, delta); + + if (use_no_round) { + for (int j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(dsta[j], dstb[j]) + << "Pixel mismatch at index " << j << " = (" + << (j % out_w) << ", " << (j / out_w) << ") on iteration " + << i; + for (int j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" + << (j % out_w) << ", " << (j / out_w) << ") on iteration " + << i; + } else { + for (int j = 0; j < out_w * out_h; ++j) + ASSERT_EQ(output[j], output2[j]) + << "Pixel mismatch at index " << j << " = (" + << (j % out_w) << ", " << (j / out_w) << ") on iteration " + << i; + } + } + } + } + } + } +} +} // namespace AV1HighbdWarpFilter +#endif // CONFIG_AV1_HIGHBITDEPTH +} // namespace libaom_test diff --git a/third_party/aom/test/warp_filter_test_util.h b/third_party/aom/test/warp_filter_test_util.h new file mode 100644 index 0000000000..364368ac0c --- /dev/null +++ b/third_party/aom/test/warp_filter_test_util.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AOM_TEST_WARP_FILTER_TEST_UTIL_H_ +#define AOM_TEST_WARP_FILTER_TEST_UTIL_H_ + +#include <tuple> + +#include "config/av1_rtcd.h" +#include "config/aom_dsp_rtcd.h" + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "test/register_state_check.h" + +#include "av1/common/mv.h" +#include "av1/common/common_data.h" + +namespace libaom_test { + +void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat, + int16_t *alpha, int16_t *beta, int16_t *gamma, + int16_t *delta, int is_alpha_zero, int is_beta_zero, + int is_gamma_zero, int is_delta_zero); + +namespace AV1WarpFilter { + +typedef void (*warp_affine_func)(const int32_t *mat, const uint8_t *ref, + int width, int height, int stride, + uint8_t *pred, int p_col, int p_row, + int p_width, int p_height, int p_stride, + int subsampling_x, int subsampling_y, + ConvolveParams *conv_params, int16_t alpha, + int16_t beta, int16_t gamma, int16_t delta); + +typedef std::tuple<int, int, int, warp_affine_func> WarpTestParam; +typedef std::tuple<WarpTestParam, int, int, int, int> WarpTestParams; + +::testing::internal::ParamGenerator<WarpTestParams> BuildParams( + warp_affine_func filter); + +class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParams> { + public: + ~AV1WarpFilterTest() override; + void SetUp() override; + + protected: + void RunCheckOutput(warp_affine_func test_impl); + void RunSpeedTest(warp_affine_func test_impl); + + libaom_test::ACMRandom rnd_; +}; + +} // namespace AV1WarpFilter + +#if CONFIG_AV1_HIGHBITDEPTH +namespace AV1HighbdWarpFilter { +typedef void (*highbd_warp_affine_func)(const int32_t *mat, const uint16_t *ref, + int width, int height, int stride, + uint16_t *pred, int p_col, int p_row, + int p_width, int p_height, int p_stride, + int subsampling_x, int subsampling_y, + int bd, ConvolveParams *conv_params, + int16_t alpha, int16_t beta, + int16_t gamma, int16_t delta); + +typedef std::tuple<int, int, int, int, highbd_warp_affine_func> + HighbdWarpTestParam; +typedef std::tuple<HighbdWarpTestParam, int, int, int, int> + HighbdWarpTestParams; + +::testing::internal::ParamGenerator<HighbdWarpTestParams> BuildParams( + highbd_warp_affine_func filter); + +class AV1HighbdWarpFilterTest + : public ::testing::TestWithParam<HighbdWarpTestParams> { + public: + ~AV1HighbdWarpFilterTest() override; + void SetUp() override; + + protected: + void RunCheckOutput(highbd_warp_affine_func test_impl); + void RunSpeedTest(highbd_warp_affine_func test_impl); + + libaom_test::ACMRandom rnd_; +}; + +} // namespace AV1HighbdWarpFilter +#endif // CONFIG_AV1_HIGHBITDEPTH + +} // namespace libaom_test + +#endif // AOM_TEST_WARP_FILTER_TEST_UTIL_H_ diff --git a/third_party/aom/test/webm_video_source.h b/third_party/aom/test/webm_video_source.h new file mode 100644 index 0000000000..845abd6dce --- /dev/null +++ b/third_party/aom/test/webm_video_source.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef AOM_TEST_WEBM_VIDEO_SOURCE_H_ +#define AOM_TEST_WEBM_VIDEO_SOURCE_H_ +#include <cstdarg> +#include <cstdio> +#include <cstdlib> +#include <new> +#include <string> +#include "common/tools_common.h" +#include "common/webmdec.h" +#include "test/video_source.h" + +namespace libaom_test { + +// This class extends VideoSource to allow parsing of WebM files, +// so that we can do actual file decodes. +class WebMVideoSource : public CompressedVideoSource { + public: + explicit WebMVideoSource(const std::string &file_name) + : file_name_(file_name), aom_ctx_(new AvxInputContext()), + webm_ctx_(new WebmInputContext()), buf_(nullptr), buf_sz_(0), + frame_sz_(0), frame_number_(0), end_of_file_(false) {} + + ~WebMVideoSource() override { + if (aom_ctx_->file != nullptr) fclose(aom_ctx_->file); + webm_free(webm_ctx_); + delete aom_ctx_; + delete webm_ctx_; + } + + void Init() override { + ASSERT_NE(aom_ctx_, nullptr); + ASSERT_NE(webm_ctx_, nullptr); + } + + void Begin() override { + ASSERT_NE(aom_ctx_, nullptr); + ASSERT_NE(webm_ctx_, nullptr); + aom_ctx_->file = OpenTestDataFile(file_name_); + ASSERT_NE(aom_ctx_->file, nullptr) + << "Input file open failed. Filename: " << file_name_; + + ASSERT_EQ(file_is_webm(webm_ctx_, aom_ctx_), 1) << "file is not WebM"; + + FillFrame(); + } + + void Next() override { + ++frame_number_; + FillFrame(); + } + + void FillFrame() { + ASSERT_NE(aom_ctx_, nullptr); + ASSERT_NE(webm_ctx_, nullptr); + ASSERT_NE(aom_ctx_->file, nullptr); + const int status = webm_read_frame(webm_ctx_, &buf_, &frame_sz_, &buf_sz_); + ASSERT_GE(status, 0) << "webm_read_frame failed"; + if (status == 1) { + end_of_file_ = true; + } + } + + void SeekToNextKeyFrame() { + ASSERT_NE(aom_ctx_, nullptr); + ASSERT_NE(webm_ctx_, nullptr); + ASSERT_NE(aom_ctx_->file, nullptr); + do { + const int status = + webm_read_frame(webm_ctx_, &buf_, &frame_sz_, &buf_sz_); + ASSERT_GE(status, 0) << "webm_read_frame failed"; + ++frame_number_; + if (status == 1) { + end_of_file_ = true; + } + } while (!webm_ctx_->is_key_frame && !end_of_file_); + } + + const uint8_t *cxdata() const override { + return end_of_file_ ? nullptr : buf_; + } + size_t frame_size() const override { return frame_sz_; } + unsigned int frame_number() const override { return frame_number_; } + + protected: + std::string file_name_; + AvxInputContext *aom_ctx_; + WebmInputContext *webm_ctx_; + uint8_t *buf_; // Owned by webm_ctx_ and freed when webm_ctx_ is freed. + size_t buf_sz_; + size_t frame_sz_; + unsigned int frame_number_; + bool end_of_file_; +}; + +} // namespace libaom_test + +#endif // AOM_TEST_WEBM_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/webmenc_test.cc b/third_party/aom/test/webmenc_test.cc new file mode 100644 index 0000000000..acd795f2ec --- /dev/null +++ b/third_party/aom/test/webmenc_test.cc @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string> +#include "common/webmenc.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +#if CONFIG_WEBM_IO + +class WebmencTest : public ::testing::Test {}; + +// All of these variations on output should be identical. +TEST(WebmencTest, ExtractEncoderSettingsOutput1) { + const char *argv[] = { "aomenc", "-o", "output", "input", + "--target-bitrate=300" }; + int argc = 5; + const std::string expected("version:1.2.3 --target-bitrate=300"); + char *result = extract_encoder_settings("1.2.3", argv, argc, "input"); + ASSERT_EQ(expected, std::string(result)); + free(result); +} + +TEST(WebmencTest, ExtractEncoderSettingsOutput2) { + const char *argv[] = { "aomenc", "--output", "bar", "foo", "--cpu-used=3" }; + int argc = 5; + const std::string expected("version:abc --cpu-used=3"); + char *result = extract_encoder_settings("abc", argv, argc, "foo"); + ASSERT_EQ(expected, std::string(result)); + free(result); +} + +TEST(WebmencTest, ExtractEncoderSettingsOutput3) { + const char *argv[] = { "aomenc", "--cq-level=63", "--end-usage=q", + "--output=foo", "baz" }; + int argc = 5; + const std::string expected("version:23 --cq-level=63 --end-usage=q"); + char *result = extract_encoder_settings("23", argv, argc, "baz"); + ASSERT_EQ(expected, std::string(result)); + free(result); +} + +TEST(WebmencTest, ExtractEncoderSettingsInput) { + // Check that input filename is filtered regardless of position. + const char *argv[] = { "aomenc", "-o", "out", "input", "-p", "2" }; + int argc = 6; + const char version[] = "1.0.0"; + const std::string expected("version:1.0.0 -p 2"); + char *result = extract_encoder_settings(version, argv, argc, "input"); + ASSERT_EQ(expected, std::string(result)); + free(result); + + const char *argv2[] = { "aomenc", "input", "-o", "out", "-p", "2" }; + result = extract_encoder_settings(version, argv2, argc, "input"); + ASSERT_EQ(expected, std::string(result)); + free(result); +} + +#endif // CONFIG_WEBM_IO +} // namespace diff --git a/third_party/aom/test/wiener_test.cc b/third_party/aom/test/wiener_test.cc new file mode 100644 index 0000000000..7eb6372aaa --- /dev/null +++ b/third_party/aom/test/wiener_test.cc @@ -0,0 +1,1390 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <tuple> +#include <utility> +#include <vector> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +#include "test/register_state_check.h" +#include "test/acm_random.h" +#include "test/util.h" + +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" + +#include "aom/aom_integer.h" +#include "aom_ports/aom_timer.h" +#include "av1/encoder/pickrst.h" + +#define MAX_WIENER_BLOCK 384 +#define MAX_DATA_BLOCK (MAX_WIENER_BLOCK + WIENER_WIN) + +// 8-bit-depth tests +namespace wiener_lowbd { + +// C implementation of the algorithm implmented by the SIMD code. +// This is a little more efficient than the version in av1_compute_stats_c(). +static void compute_stats_win_opt_c(int wiener_win, const uint8_t *dgd, + const uint8_t *src, int16_t *d, int16_t *s, + int h_start, int h_end, int v_start, + int v_end, int dgd_stride, int src_stride, + int64_t *M, int64_t *H, + int use_downsampled_wiener_stats) { + ASSERT_TRUE(wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA); + (void)d; + (void)s; + int i, j, k, l, m, n; + const int pixel_count = (h_end - h_start) * (v_end - v_start); + const int wiener_win2 = wiener_win * wiener_win; + const int wiener_halfwin = (wiener_win >> 1); + uint8_t avg = find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride); + int downsample_factor = + use_downsampled_wiener_stats ? WIENER_STATS_DOWNSAMPLE_FACTOR : 1; + + std::vector<std::vector<int64_t> > M_int(wiener_win, + std::vector<int64_t>(wiener_win, 0)); + std::vector<std::vector<int64_t> > H_int( + wiener_win * wiener_win, std::vector<int64_t>(wiener_win * 8, 0)); + std::vector<std::vector<int32_t> > sumY(wiener_win, + std::vector<int32_t>(wiener_win, 0)); + int32_t sumX = 0; + const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin; + + // Main loop handles two pixels at a time + // We can assume that h_start is even, since it will always be aligned to + // a tile edge + some number of restoration units, and both of those will + // be 64-pixel aligned. + // However, at the edge of the image, h_end may be odd, so we need to handle + // that case correctly. + assert(h_start % 2 == 0); + for (i = v_start; i < v_end; i = i + downsample_factor) { + if (use_downsampled_wiener_stats && + (v_end - i < WIENER_STATS_DOWNSAMPLE_FACTOR)) { + downsample_factor = v_end - i; + } + int32_t sumX_row_i32 = 0; + std::vector<std::vector<int32_t> > sumY_row( + wiener_win, std::vector<int32_t>(wiener_win, 0)); + std::vector<std::vector<int32_t> > M_row_i32( + wiener_win, std::vector<int32_t>(wiener_win, 0)); + std::vector<std::vector<int32_t> > H_row_i32( + wiener_win * wiener_win, std::vector<int32_t>(wiener_win * 8, 0)); + const int h_end_even = h_end & ~1; + const int has_odd_pixel = h_end & 1; + for (j = h_start; j < h_end_even; j += 2) { + const uint8_t X1 = src[i * src_stride + j]; + const uint8_t X2 = src[i * src_stride + j + 1]; + sumX_row_i32 += X1 + X2; + + const uint8_t *dgd_ij = dgd_win + i * dgd_stride + j; + for (k = 0; k < wiener_win; k++) { + for (l = 0; l < wiener_win; l++) { + const uint8_t *dgd_ijkl = dgd_ij + k * dgd_stride + l; + int32_t *H_int_temp = &H_row_i32[(l * wiener_win + k)][0]; + const uint8_t D1 = dgd_ijkl[0]; + const uint8_t D2 = dgd_ijkl[1]; + sumY_row[k][l] += D1 + D2; + M_row_i32[l][k] += D1 * X1 + D2 * X2; + for (m = 0; m < wiener_win; m++) { + for (n = 0; n < wiener_win; n++) { + H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m] + + D2 * dgd_ij[n + dgd_stride * m + 1]; + } + } + } + } + } + // If the width is odd, add in the final pixel + if (has_odd_pixel) { + const uint8_t X1 = src[i * src_stride + j]; + sumX_row_i32 += X1; + + const uint8_t *dgd_ij = dgd_win + i * dgd_stride + j; + for (k = 0; k < wiener_win; k++) { + for (l = 0; l < wiener_win; l++) { + const uint8_t *dgd_ijkl = dgd_ij + k * dgd_stride + l; + int32_t *H_int_temp = &H_row_i32[(l * wiener_win + k)][0]; + const uint8_t D1 = dgd_ijkl[0]; + sumY_row[k][l] += D1; + M_row_i32[l][k] += D1 * X1; + for (m = 0; m < wiener_win; m++) { + for (n = 0; n < wiener_win; n++) { + H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m]; + } + } + } + } + } + + sumX += sumX_row_i32 * downsample_factor; + // Scale M matrix based on the downsampling factor + for (k = 0; k < wiener_win; ++k) { + for (l = 0; l < wiener_win; ++l) { + sumY[k][l] += sumY_row[k][l] * downsample_factor; + M_int[k][l] += (int64_t)M_row_i32[k][l] * downsample_factor; + } + } + // Scale H matrix based on the downsampling factor + for (k = 0; k < wiener_win * wiener_win; ++k) { + for (l = 0; l < wiener_win * 8; ++l) { + H_int[k][l] += (int64_t)H_row_i32[k][l] * downsample_factor; + } + } + } + + const int64_t avg_square_sum = (int64_t)avg * (int64_t)avg * pixel_count; + for (k = 0; k < wiener_win; k++) { + for (l = 0; l < wiener_win; l++) { + M[l * wiener_win + k] = + M_int[l][k] + avg_square_sum - (int64_t)avg * (sumX + sumY[k][l]); + for (m = 0; m < wiener_win; m++) { + for (n = 0; n < wiener_win; n++) { + H[(l * wiener_win + k) * wiener_win2 + m * wiener_win + n] = + H_int[(l * wiener_win + k)][n * 8 + m] + avg_square_sum - + (int64_t)avg * (sumY[k][l] + sumY[n][m]); + } + } + } + } +} + +void compute_stats_opt_c(int wiener_win, const uint8_t *dgd, const uint8_t *src, + int16_t *d, int16_t *s, int h_start, int h_end, + int v_start, int v_end, int dgd_stride, int src_stride, + int64_t *M, int64_t *H, + int use_downsampled_wiener_stats) { + if (wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA) { + compute_stats_win_opt_c(wiener_win, dgd, src, d, s, h_start, h_end, v_start, + v_end, dgd_stride, src_stride, M, H, + use_downsampled_wiener_stats); + } else { + av1_compute_stats_c(wiener_win, dgd, src, d, s, h_start, h_end, v_start, + v_end, dgd_stride, src_stride, M, H, + use_downsampled_wiener_stats); + } +} + +static const int kIterations = 100; +typedef void (*compute_stats_Func)(int wiener_win, const uint8_t *dgd, + const uint8_t *src, int16_t *dgd_avg, + int16_t *src_avg, int h_start, int h_end, + int v_start, int v_end, int dgd_stride, + int src_stride, int64_t *M, int64_t *H, + int use_downsampled_wiener_stats); + +//////////////////////////////////////////////////////////////////////////////// +// 8 bit +//////////////////////////////////////////////////////////////////////////////// + +typedef std::tuple<const compute_stats_Func> WienerTestParam; + +class WienerTest : public ::testing::TestWithParam<WienerTestParam> { + public: + void SetUp() override { + src_buf = (uint8_t *)aom_memalign( + 32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*src_buf)); + ASSERT_NE(src_buf, nullptr); + dgd_buf = (uint8_t *)aom_memalign( + 32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*dgd_buf)); + ASSERT_NE(dgd_buf, nullptr); + const int buf_size = + sizeof(*buf) * 6 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX; + buf = (int16_t *)aom_memalign(32, buf_size); + ASSERT_NE(buf, nullptr); + memset(buf, 0, buf_size); + target_func_ = GET_PARAM(0); + } + void TearDown() override { + aom_free(src_buf); + aom_free(dgd_buf); + aom_free(buf); + } + void RunWienerTest(const int32_t wiener_win, int32_t run_times); + void RunWienerTest_ExtremeValues(const int32_t wiener_win); + + private: + compute_stats_Func target_func_; + libaom_test::ACMRandom rng_; + uint8_t *src_buf; + uint8_t *dgd_buf; + int16_t *buf; +}; + +void WienerTest::RunWienerTest(const int32_t wiener_win, int32_t run_times) { + const int32_t wiener_halfwin = wiener_win >> 1; + const int32_t wiener_win2 = wiener_win * wiener_win; + DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]); + // Note(rachelbarker): + // The SIMD code requires `h_start` to be even, but can otherwise + // deal with any values of `h_end`, `v_start`, `v_end`. We cover this + // entire range, even though (at the time of writing) `h_start` and `v_start` + // will always be multiples of 64 when called from non-test code. + // If in future any new requirements are added, these lines will + // need changing. + int h_start = (rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & ~1; + int h_end = run_times != 1 ? 256 : (rng_.Rand16() % MAX_WIENER_BLOCK); + if (h_start > h_end) std::swap(h_start, h_end); + int v_start = rng_.Rand16() % (MAX_WIENER_BLOCK / 2); + int v_end = run_times != 1 ? 256 : (rng_.Rand16() % MAX_WIENER_BLOCK); + if (v_start > v_end) std::swap(v_start, v_end); + const int dgd_stride = h_end; + const int src_stride = MAX_DATA_BLOCK; + const int iters = run_times == 1 ? kIterations : 2; + const int max_value_downsample_stats = 1; + int16_t *dgd_avg = buf; + int16_t *src_avg = + buf + (3 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX); + + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_buf[i] = rng_.Rand8(); + src_buf[i] = rng_.Rand8(); + } + uint8_t *dgd = dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin; + uint8_t *src = src_buf; + for (int use_downsampled_stats = 0; + use_downsampled_stats <= max_value_downsample_stats; + use_downsampled_stats++) { + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + av1_compute_stats_c(wiener_win, dgd, src, dgd_avg, src_avg, h_start, + h_end, v_start, v_end, dgd_stride, src_stride, + M_ref, H_ref, use_downsampled_stats); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + target_func_(wiener_win, dgd, src, dgd_avg, src_avg, h_start, h_end, + v_start, v_end, dgd_stride, src_stride, M_test, H_test, + use_downsampled_stats); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 10) { + printf("win %d %3dx%-3d:%7.2f/%7.2fns", wiener_win, h_end, v_end, time1, + time2); + printf("(%3.2f)\n", time1 / time2); + } + int failed = 0; + for (int i = 0; i < wiener_win2; ++i) { + if (M_ref[i] != M_test[i]) { + failed = 1; + printf("win %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n", + wiener_win, iter, i, M_ref[i], M_test[i]); + break; + } + } + for (int i = 0; i < wiener_win2 * wiener_win2; ++i) { + if (H_ref[i] != H_test[i]) { + failed = 1; + printf("win %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n", + wiener_win, iter, i, H_ref[i], H_test[i]); + break; + } + } + ASSERT_EQ(failed, 0); + } + } +} + +void WienerTest::RunWienerTest_ExtremeValues(const int32_t wiener_win) { + const int32_t wiener_halfwin = wiener_win >> 1; + const int32_t wiener_win2 = wiener_win * wiener_win; + DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]); + const int h_start = 16; + const int h_end = MAX_WIENER_BLOCK; + const int v_start = 16; + const int v_end = MAX_WIENER_BLOCK; + const int dgd_stride = h_end; + const int src_stride = MAX_DATA_BLOCK; + const int iters = 1; + const int max_value_downsample_stats = 1; + int16_t *dgd_avg = buf; + int16_t *src_avg = + buf + (3 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX); + + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + // Fill with alternating extreme values to maximize difference with + // the average. + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_buf[i] = i & 1 ? 255 : 0; + src_buf[i] = i & 1 ? 255 : 0; + } + uint8_t *dgd = dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin; + uint8_t *src = src_buf; + for (int use_downsampled_stats = 0; + use_downsampled_stats <= max_value_downsample_stats; + use_downsampled_stats++) { + av1_compute_stats_c(wiener_win, dgd, src, dgd_avg, src_avg, h_start, + h_end, v_start, v_end, dgd_stride, src_stride, M_ref, + H_ref, use_downsampled_stats); + + target_func_(wiener_win, dgd, src, dgd_avg, src_avg, h_start, h_end, + v_start, v_end, dgd_stride, src_stride, M_test, H_test, + use_downsampled_stats); + + int failed = 0; + for (int i = 0; i < wiener_win2; ++i) { + if (M_ref[i] != M_test[i]) { + failed = 1; + printf("win %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n", + wiener_win, iter, i, M_ref[i], M_test[i]); + break; + } + } + for (int i = 0; i < wiener_win2 * wiener_win2; ++i) { + if (H_ref[i] != H_test[i]) { + failed = 1; + printf("win %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64 " \n", + wiener_win, iter, i, H_ref[i], H_test[i]); + break; + } + } + ASSERT_EQ(failed, 0); + } + } +} + +TEST_P(WienerTest, RandomValues) { + RunWienerTest(WIENER_WIN, 1); + RunWienerTest(WIENER_WIN_CHROMA, 1); +} + +TEST_P(WienerTest, ExtremeValues) { + RunWienerTest_ExtremeValues(WIENER_WIN); + RunWienerTest_ExtremeValues(WIENER_WIN_CHROMA); +} + +TEST_P(WienerTest, DISABLED_Speed) { + RunWienerTest(WIENER_WIN, 200); + RunWienerTest(WIENER_WIN_CHROMA, 200); +} + +INSTANTIATE_TEST_SUITE_P(C, WienerTest, ::testing::Values(compute_stats_opt_c)); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, WienerTest, + ::testing::Values(av1_compute_stats_sse4_1)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 + +INSTANTIATE_TEST_SUITE_P(AVX2, WienerTest, + ::testing::Values(av1_compute_stats_avx2)); +#endif // HAVE_AVX2 + +#if HAVE_NEON + +INSTANTIATE_TEST_SUITE_P(NEON, WienerTest, + ::testing::Values(av1_compute_stats_neon)); +#endif // HAVE_NEON + +} // namespace wiener_lowbd + +#if CONFIG_AV1_HIGHBITDEPTH +// High bit-depth tests: +namespace wiener_highbd { + +static void compute_stats_highbd_win_opt_c(int wiener_win, const uint8_t *dgd8, + const uint8_t *src8, int h_start, + int h_end, int v_start, int v_end, + int dgd_stride, int src_stride, + int64_t *M, int64_t *H, + aom_bit_depth_t bit_depth) { + ASSERT_TRUE(wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA); + int i, j, k, l, m, n; + const int pixel_count = (h_end - h_start) * (v_end - v_start); + const int wiener_win2 = wiener_win * wiener_win; + const int wiener_halfwin = (wiener_win >> 1); + const uint16_t *src = CONVERT_TO_SHORTPTR(src8); + const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8); + const uint16_t avg = + find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride); + + std::vector<std::vector<int64_t> > M_int(wiener_win, + std::vector<int64_t>(wiener_win, 0)); + std::vector<std::vector<int64_t> > H_int( + wiener_win * wiener_win, std::vector<int64_t>(wiener_win * 8, 0)); + std::vector<std::vector<int32_t> > sumY(wiener_win, + std::vector<int32_t>(wiener_win, 0)); + + memset(M, 0, sizeof(*M) * wiener_win2); + memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2); + + int64_t sumX = 0; + const uint16_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin; + + // Main loop handles two pixels at a time + // We can assume that h_start is even, since it will always be aligned to + // a tile edge + some number of restoration units, and both of those will + // be 64-pixel aligned. + // However, at the edge of the image, h_end may be odd, so we need to handle + // that case correctly. + assert(h_start % 2 == 0); + for (i = v_start; i < v_end; i++) { + const int h_end_even = h_end & ~1; + const int has_odd_pixel = h_end & 1; + for (j = h_start; j < h_end_even; j += 2) { + const uint16_t X1 = src[i * src_stride + j]; + const uint16_t X2 = src[i * src_stride + j + 1]; + sumX += X1 + X2; + + const uint16_t *dgd_ij = dgd_win + i * dgd_stride + j; + for (k = 0; k < wiener_win; k++) { + for (l = 0; l < wiener_win; l++) { + const uint16_t *dgd_ijkl = dgd_ij + k * dgd_stride + l; + int64_t *H_int_temp = &H_int[(l * wiener_win + k)][0]; + const uint16_t D1 = dgd_ijkl[0]; + const uint16_t D2 = dgd_ijkl[1]; + sumY[k][l] += D1 + D2; + M_int[l][k] += D1 * X1 + D2 * X2; + for (m = 0; m < wiener_win; m++) { + for (n = 0; n < wiener_win; n++) { + H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m] + + D2 * dgd_ij[n + dgd_stride * m + 1]; + } + } + } + } + } + // If the width is odd, add in the final pixel + if (has_odd_pixel) { + const uint16_t X1 = src[i * src_stride + j]; + sumX += X1; + + const uint16_t *dgd_ij = dgd_win + i * dgd_stride + j; + for (k = 0; k < wiener_win; k++) { + for (l = 0; l < wiener_win; l++) { + const uint16_t *dgd_ijkl = dgd_ij + k * dgd_stride + l; + int64_t *H_int_temp = &H_int[(l * wiener_win + k)][0]; + const uint16_t D1 = dgd_ijkl[0]; + sumY[k][l] += D1; + M_int[l][k] += D1 * X1; + for (m = 0; m < wiener_win; m++) { + for (n = 0; n < wiener_win; n++) { + H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m]; + } + } + } + } + } + } + + uint8_t bit_depth_divider = 1; + if (bit_depth == AOM_BITS_12) + bit_depth_divider = 16; + else if (bit_depth == AOM_BITS_10) + bit_depth_divider = 4; + + const int64_t avg_square_sum = (int64_t)avg * (int64_t)avg * pixel_count; + for (k = 0; k < wiener_win; k++) { + for (l = 0; l < wiener_win; l++) { + M[l * wiener_win + k] = + (M_int[l][k] + + (avg_square_sum - (int64_t)avg * (sumX + sumY[k][l]))) / + bit_depth_divider; + for (m = 0; m < wiener_win; m++) { + for (n = 0; n < wiener_win; n++) { + H[(l * wiener_win + k) * wiener_win2 + m * wiener_win + n] = + (H_int[(l * wiener_win + k)][n * 8 + m] + + (avg_square_sum - (int64_t)avg * (sumY[k][l] + sumY[n][m]))) / + bit_depth_divider; + } + } + } + } +} + +void compute_stats_highbd_opt_c(int wiener_win, const uint8_t *dgd, + const uint8_t *src, int h_start, int h_end, + int v_start, int v_end, int dgd_stride, + int src_stride, int64_t *M, int64_t *H, + aom_bit_depth_t bit_depth) { + if (wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA) { + compute_stats_highbd_win_opt_c(wiener_win, dgd, src, h_start, h_end, + v_start, v_end, dgd_stride, src_stride, M, H, + bit_depth); + } else { + av1_compute_stats_highbd_c(wiener_win, dgd, src, h_start, h_end, v_start, + v_end, dgd_stride, src_stride, M, H, bit_depth); + } +} + +static const int kIterations = 100; +typedef void (*compute_stats_Func)(int wiener_win, const uint8_t *dgd, + const uint8_t *src, int h_start, int h_end, + int v_start, int v_end, int dgd_stride, + int src_stride, int64_t *M, int64_t *H, + aom_bit_depth_t bit_depth); + +typedef std::tuple<const compute_stats_Func> WienerTestParam; + +class WienerTestHighbd : public ::testing::TestWithParam<WienerTestParam> { + public: + void SetUp() override { + src_buf = (uint16_t *)aom_memalign( + 32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*src_buf)); + ASSERT_NE(src_buf, nullptr); + dgd_buf = (uint16_t *)aom_memalign( + 32, MAX_DATA_BLOCK * MAX_DATA_BLOCK * sizeof(*dgd_buf)); + ASSERT_NE(dgd_buf, nullptr); + target_func_ = GET_PARAM(0); + } + void TearDown() override { + aom_free(src_buf); + aom_free(dgd_buf); + } + void RunWienerTest(const int32_t wiener_win, int32_t run_times, + aom_bit_depth_t bit_depth); + void RunWienerTest_ExtremeValues(const int32_t wiener_win, + aom_bit_depth_t bit_depth); + + private: + compute_stats_Func target_func_; + libaom_test::ACMRandom rng_; + uint16_t *src_buf; + uint16_t *dgd_buf; +}; + +void WienerTestHighbd::RunWienerTest(const int32_t wiener_win, + int32_t run_times, + aom_bit_depth_t bit_depth) { + const int32_t wiener_halfwin = wiener_win >> 1; + const int32_t wiener_win2 = wiener_win * wiener_win; + DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]); + // Note(rachelbarker): + // The SIMD code requires `h_start` to be even, but can otherwise + // deal with any values of `h_end`, `v_start`, `v_end`. We cover this + // entire range, even though (at the time of writing) `h_start` and `v_start` + // will always be multiples of 64 when called from non-test code. + // If in future any new requirements are added, these lines will + // need changing. + int h_start = (rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & ~1; + int h_end = run_times != 1 ? 256 : (rng_.Rand16() % MAX_WIENER_BLOCK); + if (h_start > h_end) std::swap(h_start, h_end); + int v_start = rng_.Rand16() % (MAX_WIENER_BLOCK / 2); + int v_end = run_times != 1 ? 256 : (rng_.Rand16() % MAX_WIENER_BLOCK); + if (v_start > v_end) std::swap(v_start, v_end); + const int dgd_stride = h_end; + const int src_stride = MAX_DATA_BLOCK; + const int iters = run_times == 1 ? kIterations : 2; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_buf[i] = rng_.Rand16() % (1 << bit_depth); + src_buf[i] = rng_.Rand16() % (1 << bit_depth); + } + const uint8_t *dgd8 = CONVERT_TO_BYTEPTR( + dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin); + const uint8_t *src8 = CONVERT_TO_BYTEPTR(src_buf); + + aom_usec_timer timer; + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + av1_compute_stats_highbd_c(wiener_win, dgd8, src8, h_start, h_end, + v_start, v_end, dgd_stride, src_stride, M_ref, + H_ref, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + aom_usec_timer_start(&timer); + for (int i = 0; i < run_times; ++i) { + target_func_(wiener_win, dgd8, src8, h_start, h_end, v_start, v_end, + dgd_stride, src_stride, M_test, H_test, bit_depth); + } + aom_usec_timer_mark(&timer); + const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); + if (run_times > 10) { + printf("win %d bd %d %3dx%-3d:%7.2f/%7.2fns", wiener_win, bit_depth, + h_end, v_end, time1, time2); + printf("(%3.2f)\n", time1 / time2); + } + int failed = 0; + for (int i = 0; i < wiener_win2; ++i) { + if (M_ref[i] != M_test[i]) { + failed = 1; + printf("win %d bd %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64 + " \n", + wiener_win, bit_depth, iter, i, M_ref[i], M_test[i]); + break; + } + } + for (int i = 0; i < wiener_win2 * wiener_win2; ++i) { + if (H_ref[i] != H_test[i]) { + failed = 1; + printf("win %d bd %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64 + " \n", + wiener_win, bit_depth, iter, i, H_ref[i], H_test[i]); + break; + } + } + ASSERT_EQ(failed, 0); + } +} + +void WienerTestHighbd::RunWienerTest_ExtremeValues(const int32_t wiener_win, + aom_bit_depth_t bit_depth) { + const int32_t wiener_halfwin = wiener_win >> 1; + const int32_t wiener_win2 = wiener_win * wiener_win; + DECLARE_ALIGNED(32, int64_t, M_ref[WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, H_ref[WIENER_WIN2 * WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, M_test[WIENER_WIN2]); + DECLARE_ALIGNED(32, int64_t, H_test[WIENER_WIN2 * WIENER_WIN2]); + const int h_start = 16; + const int h_end = MAX_WIENER_BLOCK; + const int v_start = 16; + const int v_end = MAX_WIENER_BLOCK; + const int dgd_stride = h_end; + const int src_stride = MAX_DATA_BLOCK; + const int iters = 1; + for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) { + // Fill with alternating extreme values to maximize difference with + // the average. + for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) { + dgd_buf[i] = i & 1 ? ((uint16_t)1 << bit_depth) - 1 : 0; + src_buf[i] = i & 1 ? ((uint16_t)1 << bit_depth) - 1 : 0; + } + const uint8_t *dgd8 = CONVERT_TO_BYTEPTR( + dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin); + const uint8_t *src8 = CONVERT_TO_BYTEPTR(src_buf); + + av1_compute_stats_highbd_c(wiener_win, dgd8, src8, h_start, h_end, v_start, + v_end, dgd_stride, src_stride, M_ref, H_ref, + bit_depth); + + target_func_(wiener_win, dgd8, src8, h_start, h_end, v_start, v_end, + dgd_stride, src_stride, M_test, H_test, bit_depth); + + int failed = 0; + for (int i = 0; i < wiener_win2; ++i) { + if (M_ref[i] != M_test[i]) { + failed = 1; + printf("win %d bd %d M iter %d [%4d] ref %6" PRId64 " test %6" PRId64 + " \n", + wiener_win, bit_depth, iter, i, M_ref[i], M_test[i]); + break; + } + } + for (int i = 0; i < wiener_win2 * wiener_win2; ++i) { + if (H_ref[i] != H_test[i]) { + failed = 1; + printf("win %d bd %d H iter %d [%4d] ref %6" PRId64 " test %6" PRId64 + " \n", + wiener_win, bit_depth, iter, i, H_ref[i], H_test[i]); + break; + } + } + ASSERT_EQ(failed, 0); + } +} + +TEST_P(WienerTestHighbd, RandomValues) { + RunWienerTest(WIENER_WIN, 1, AOM_BITS_8); + RunWienerTest(WIENER_WIN_CHROMA, 1, AOM_BITS_8); + RunWienerTest(WIENER_WIN, 1, AOM_BITS_10); + RunWienerTest(WIENER_WIN_CHROMA, 1, AOM_BITS_10); + RunWienerTest(WIENER_WIN, 1, AOM_BITS_12); + RunWienerTest(WIENER_WIN_CHROMA, 1, AOM_BITS_12); +} + +TEST_P(WienerTestHighbd, ExtremeValues) { + RunWienerTest_ExtremeValues(WIENER_WIN, AOM_BITS_8); + RunWienerTest_ExtremeValues(WIENER_WIN_CHROMA, AOM_BITS_8); + RunWienerTest_ExtremeValues(WIENER_WIN, AOM_BITS_10); + RunWienerTest_ExtremeValues(WIENER_WIN_CHROMA, AOM_BITS_10); + RunWienerTest_ExtremeValues(WIENER_WIN, AOM_BITS_12); + RunWienerTest_ExtremeValues(WIENER_WIN_CHROMA, AOM_BITS_12); +} + +TEST_P(WienerTestHighbd, DISABLED_Speed) { + RunWienerTest(WIENER_WIN, 200, AOM_BITS_8); + RunWienerTest(WIENER_WIN_CHROMA, 200, AOM_BITS_8); + RunWienerTest(WIENER_WIN, 200, AOM_BITS_10); + RunWienerTest(WIENER_WIN_CHROMA, 200, AOM_BITS_10); + RunWienerTest(WIENER_WIN, 200, AOM_BITS_12); + RunWienerTest(WIENER_WIN_CHROMA, 200, AOM_BITS_12); +} + +INSTANTIATE_TEST_SUITE_P(C, WienerTestHighbd, + ::testing::Values(compute_stats_highbd_opt_c)); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_SUITE_P(SSE4_1, WienerTestHighbd, + ::testing::Values(av1_compute_stats_highbd_sse4_1)); +#endif // HAVE_SSE4_1 + +#if HAVE_AVX2 +INSTANTIATE_TEST_SUITE_P(AVX2, WienerTestHighbd, + ::testing::Values(av1_compute_stats_highbd_avx2)); +#endif // HAVE_AVX2 + +#if HAVE_NEON +INSTANTIATE_TEST_SUITE_P(NEON, WienerTestHighbd, + ::testing::Values(av1_compute_stats_highbd_neon)); +#endif // HAVE_NEON + +// A test that reproduces b/274668506: signed integer overflow in +// update_a_sep_sym(). +TEST(SearchWienerTest, 10bitSignedIntegerOverflowInUpdateASepSym) { + constexpr int kWidth = 427; + constexpr int kHeight = 1; + std::vector<uint16_t> buffer(3 * kWidth * kHeight); + // The values in the buffer alternate between 0 and 1023. + uint16_t value = 0; + for (size_t i = 0; i < buffer.size(); ++i) { + buffer[i] = value; + value = 1023 - value; + } + unsigned char *img_data = reinterpret_cast<unsigned char *>(buffer.data()); + + aom_image_t img; + EXPECT_EQ( + aom_img_wrap(&img, AOM_IMG_FMT_I44416, kWidth, kHeight, 1, img_data), + &img); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA), + AOM_CODEC_OK); + cfg.rc_end_usage = AOM_Q; + cfg.g_profile = 1; + cfg.g_bit_depth = AOM_BITS_10; + cfg.g_input_bit_depth = 10; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_limit = 1; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = AOM_KF_DISABLED; + cfg.kf_max_dist = 0; + cfg.g_threads = 61; + cfg.rc_min_quantizer = 2; + cfg.rc_max_quantizer = 20; + aom_codec_ctx_t enc; + EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 11), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 1), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_ROWS, 4), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 3), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM), + AOM_CODEC_OK); + + // Encode frame + EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +// A test that reproduces b/281219978: signed integer overflow in +// update_b_sep_sym(). +TEST(SearchWienerTest, 12bitSignedIntegerOverflowInUpdateBSepSym) { + constexpr int kWidth = 311; + constexpr int kHeight = 3; + static const uint16_t buffer[3 * kWidth * kHeight] = { + // Y plane: + 0, 0, 0, 2156, 2513, 2211, 4095, 4095, 0, 2538, 0, 0, 0, 0, 4095, 0, 258, + 941, 4095, 907, 0, 0, 2325, 2485, 2408, 4095, 1513, 0, 3644, 2080, 4095, + 4095, 0, 2135, 0, 2461, 4095, 0, 4095, 4095, 0, 1987, 0, 3629, 0, 4095, + 3918, 4095, 0, 4095, 4095, 4095, 0, 1065, 0, 2072, 3597, 102, 0, 534, 0, 0, + 0, 4095, 0, 0, 4095, 0, 4095, 0, 4095, 0, 3611, 0, 1139, 4095, 0, 0, 0, 0, + 0, 4095, 0, 0, 0, 0, 4095, 4095, 4095, 0, 0, 0, 3070, 3224, 0, 0, 4095, + 4051, 4095, 0, 4095, 3712, 0, 1465, 4095, 1699, 4095, 4095, 0, 0, 0, 3885, + 0, 4095, 0, 0, 4095, 1686, 4095, 4095, 4095, 4095, 1330, 0, 0, 0, 4095, 0, + 4095, 4095, 3919, 4095, 781, 2371, 2055, 4095, 912, 3710, 0, 2045, 0, 4095, + 4095, 4095, 1811, 0, 1298, 1115, 0, 3327, 0, 0, 4095, 0, 253, 2386, 4095, + 1791, 3657, 1444, 0, 4095, 1918, 4095, 4095, 0, 4095, 305, 1587, 0, 4095, 0, + 3759, 0, 0, 4095, 2387, 4095, 4095, 0, 0, 4095, 4095, 0, 1015, 4095, 0, 768, + 2598, 1667, 130, 4095, 0, 0, 435, 4095, 3683, 4095, 0, 4095, 4095, 1888, + 2828, 4095, 3349, 0, 4095, 4095, 4095, 4095, 0, 4095, 0, 0, 4095, 0, 2491, + 1598, 0, 0, 383, 3712, 4095, 0, 0, 4095, 760, 4095, 4095, 4095, 2030, 4095, + 0, 0, 3236, 0, 1040, 0, 0, 4095, 0, 0, 4095, 4095, 4095, 0, 0, 1043, 3897, + 2446, 233, 1589, 427, 4095, 4095, 4095, 4095, 0, 1656, 3786, 4095, 0, 840, + 4095, 4095, 1429, 4095, 0, 4095, 2734, 4095, 0, 2431, 1801, 278, 0, 4095, 0, + 4095, 0, 0, 420, 0, 0, 746, 0, 0, 3281, 3006, 4095, 4095, 0, 0, 0, 3605, + 4095, 4095, 0, 4095, 4095, 4095, 4095, 2660, 496, 4095, 0, 0, 0, 0, 4095, 0, + 1317, 4095, 4095, 510, 1919, 0, 3893, 0, 4095, 4095, 4095, 4095, 4095, 2071, + 2006, 0, 3316, 4095, 0, 0, 4095, 852, 2982, 0, 2073, 0, 2728, 1499, 4095, + 852, 361, 3137, 4095, 4095, 1502, 1575, 0, 4095, 0, 0, 0, 0, 1585, 4095, 0, + 4095, 0, 3188, 3244, 4095, 2958, 4095, 4095, 0, 4095, 4095, 4095, 1706, + 2896, 4095, 1788, 730, 1146, 4095, 0, 0, 4095, 0, 0, 0, 2791, 3613, 2175, + 2925, 0, 0, 0, 0, 0, 1279, 4095, 4095, 0, 4095, 0, 0, 2336, 0, 3462, 4095, + 0, 4095, 1997, 2328, 2860, 0, 4095, 4095, 3241, 4095, 4095, 4095, 4095, + 4095, 4095, 118, 0, 4095, 4095, 4095, 0, 3734, 0, 0, 0, 4095, 1952, 4095, + 413, 4095, 1183, 4095, 0, 4095, 0, 0, 4095, 4095, 4095, 3805, 0, 1398, 0, + 4095, 0, 0, 0, 4095, 4095, 4095, 2802, 3658, 4095, 4095, 0, 0, 0, 4095, 0, + 897, 0, 4095, 2163, 0, 0, 0, 4095, 1440, 2487, 4095, 4095, 0, 4095, 4095, + 4095, 2808, 0, 1999, 0, 0, 4095, 4095, 4095, 1563, 124, 2179, 754, 0, 0, + 2407, 2798, 0, 4095, 4095, 0, 0, 1929, 0, 0, 0, 1387, 4095, 4095, 0, 0, + 3911, 562, 4095, 0, 4095, 2639, 2673, 4095, 4095, 0, 0, 4095, 4095, 0, 4095, + 4095, 901, 0, 321, 3961, 4095, 0, 4095, 4095, 4095, 0, 0, 0, 0, 3035, 3713, + 3441, 0, 4095, 0, 0, 854, 1544, 3963, 1968, 4095, 0, 0, 0, 0, 2897, 4095, 0, + 4095, 4095, 0, 235, 1011, 4095, 0, 3452, 4095, 4095, 0, 0, 4095, 4095, 4095, + 4095, 4095, 3312, 0, 3064, 4095, 3981, 4095, 4095, 4095, 4095, 4095, 0, 791, + 3243, 4095, 799, 0, 0, 0, 523, 2117, 3776, 0, 4095, 3311, 0, 543, 4095, + 4095, 4095, 0, 0, 4095, 4095, 4095, 4095, 0, 0, 4095, 4095, 225, 0, 1195, + 3070, 1210, 4095, 0, 4095, 498, 782, 0, 0, 4095, 4095, 4095, 4095, 4095, + 1456, 4095, 3898, 1472, 4095, 4095, 0, 4095, 4026, 0, 0, 2354, 1554, 0, + 4095, 0, 2986, 0, 1053, 1228, 0, 0, 4095, 4095, 0, 0, 4095, 0, 0, 4095, 0, + 0, 0, 606, 0, 4095, 3563, 4095, 2016, 4095, 0, 0, 4095, 0, 4095, 4095, 4095, + 0, 0, 0, 929, 0, 0, 4095, 0, 3069, 4095, 0, 2687, 4095, 4095, 4095, 2015, + 4095, 4095, 4095, 0, 4095, 0, 0, 2860, 3668, 0, 0, 4095, 2523, 2104, 0, 0, + 3063, 4095, 3674, 4095, 0, 2762, 0, 4095, 2582, 3473, 930, 0, 1012, 108, 38, + 4095, 1148, 3568, 4036, 4095, 4095, 0, 1120, 1873, 3028, 4095, 515, 1902, + 4095, 0, 815, 4095, 1548, 0, 1073, 3919, 4095, 2374, 0, 3126, 4095, 2268, 0, + 0, 0, 4095, 425, 4095, 0, 0, 4095, 4095, 2710, 4095, 2067, 4095, 4095, 2201, + 4095, 4095, 0, 4095, 4095, 2933, 0, 417, 2801, 4095, 4095, 3274, 0, 2870, + 4095, 4095, 0, 0, 973, 0, 0, 3129, 4095, 0, 0, 0, 4095, 4095, 4095, 0, 242, + 4095, 0, 4095, 0, 0, 0, 0, 987, 0, 2426, 4045, 2780, 0, 4095, 3762, 3361, + 3095, 4095, 596, 1072, 4071, 4095, 4095, 0, 0, 81, 0, 1001, 1683, 4095, + 4095, 3105, 2673, 0, 3300, 104, 4030, 0, 2615, 4095, 4095, 0, 4095, 1830, + 3917, 4095, 4095, 4095, 0, 4095, 3637, 0, 4095, 4095, 3677, 4095, 4095, 0, + 880, 4095, 4095, 0, 2797, 0, 0, 0, 0, 3225, 4095, 4095, 1925, 2885, 1879, 0, + 0, 4095, 0, 0, 0, 2974, 559, 0, 0, 0, 699, 997, 1491, 423, 4012, 0, 2315, + 4095, 0, 0, 4095, 0, 836, 4095, 0, 4095, 0, 1752, 0, 0, 0, 4095, 4095, 0, 0, + 51, 4095, 350, 0, 2143, 2588, 0, 4095, 0, 4095, 0, 2757, 2370, 4095, 668, + 4095, 0, 4095, 0, 3652, 3890, 0, 4095, 0, 4095, 4095, 4095, 4095, 4095, + // U plane: + 4095, 4095, 1465, 0, 588, 4095, 0, 4095, 4095, 4095, 0, 2167, 4095, 4095, + 918, 3223, 4095, 4095, 0, 696, 4095, 4095, 0, 0, 594, 4095, 2935, 0, 0, 0, + 2036, 4095, 0, 2492, 4095, 4095, 0, 0, 0, 3883, 0, 4095, 483, 4095, 4095, + 324, 923, 0, 3079, 0, 4095, 4095, 810, 0, 3371, 4095, 4095, 0, 4095, 2756, + 0, 723, 0, 3338, 1084, 0, 4095, 4095, 3764, 0, 4095, 4095, 4095, 2323, 0, + 3693, 682, 0, 0, 909, 4095, 2348, 4095, 4095, 4095, 1509, 4095, 0, 4095, + 4095, 4095, 4095, 3977, 3652, 1580, 637, 4095, 0, 593, 4095, 1199, 1773, + 4095, 4095, 4095, 0, 3447, 0, 0, 4095, 3873, 0, 0, 2094, 0, 1195, 0, 3892, + 4095, 4095, 729, 4095, 0, 0, 4095, 449, 4095, 4095, 2900, 0, 4095, 0, 2114, + 4095, 4095, 4095, 1174, 995, 2933, 360, 0, 1970, 0, 4095, 1208, 0, 4095, 0, + 4095, 0, 4095, 4095, 0, 4095, 0, 0, 0, 1976, 0, 0, 921, 4095, 4095, 192, + 1006, 0, 0, 2725, 4095, 0, 2813, 0, 0, 2375, 4095, 1982, 0, 2725, 4095, + 1225, 3566, 4095, 0, 344, 863, 2747, 0, 4095, 4095, 1928, 4095, 4095, 0, + 3640, 0, 1744, 3191, 4095, 4095, 0, 4095, 4095, 4095, 0, 0, 748, 4095, 0, + 2609, 0, 0, 0, 0, 0, 3508, 4095, 4095, 2463, 0, 4095, 0, 4095, 4095, 4095, + 3175, 419, 2193, 0, 0, 4095, 0, 0, 4095, 4051, 2159, 4095, 4095, 2262, 379, + 4095, 0, 0, 3399, 4095, 4095, 4095, 3769, 2510, 4054, 3336, 730, 3968, 0, 0, + 3354, 0, 1822, 0, 4095, 0, 3847, 3823, 3262, 0, 0, 2936, 0, 4095, 4095, + 2120, 0, 3147, 0, 2838, 3480, 474, 1194, 4095, 4095, 2820, 4095, 0, 4095, + 1882, 4095, 1085, 0, 4095, 2234, 3371, 4095, 0, 4095, 0, 0, 0, 2586, 4095, + 4095, 4095, 4095, 0, 3818, 1401, 2273, 4095, 0, 4095, 0, 3907, 4095, 4095, + 694, 0, 4066, 4095, 0, 0, 4095, 2116, 4095, 4095, 4095, 4095, 4095, 0, 2821, + 29, 0, 0, 663, 1711, 652, 1271, 4095, 4095, 2401, 3726, 4095, 3453, 1803, + 3614, 0, 4095, 3439, 4095, 0, 4095, 0, 816, 0, 0, 4095, 4095, 2635, 0, 1918, + 0, 2663, 381, 0, 0, 3670, 0, 4095, 3065, 965, 4095, 4095, 4095, 2993, 4095, + 4095, 0, 4095, 973, 4095, 0, 4095, 4095, 0, 3071, 0, 2777, 4095, 4095, 0, + 3996, 4095, 1637, 0, 4095, 67, 3784, 0, 0, 4095, 2603, 579, 4095, 4095, + 2854, 4095, 3016, 0, 4095, 0, 0, 4095, 4095, 4095, 4095, 3998, 3023, 4095, + 4095, 0, 0, 0, 4095, 4095, 4095, 4095, 0, 0, 2623, 1308, 55, 4095, 0, 0, + 2554, 2311, 0, 4095, 4095, 4095, 1134, 2112, 0, 4095, 4095, 0, 4095, 0, 645, + 0, 0, 4095, 0, 909, 0, 0, 1719, 4095, 0, 3542, 0, 575, 0, 4095, 4095, 4095, + 3428, 1172, 481, 1521, 4095, 3199, 1265, 4095, 3518, 4017, 4095, 760, 2042, + 3986, 0, 4095, 42, 4095, 0, 4095, 4095, 4095, 4095, 2235, 346, 3865, 0, + 4095, 4095, 4095, 4095, 4095, 4095, 845, 4095, 0, 2826, 4095, 4095, 0, 0, + 335, 1614, 1465, 0, 4095, 4095, 0, 2771, 4095, 0, 2810, 4095, 4095, 0, 1254, + 4095, 2589, 4095, 4095, 2252, 0, 0, 0, 4095, 0, 73, 4095, 4095, 0, 1341, 0, + 0, 0, 0, 4095, 0, 0, 2645, 1985, 492, 914, 3996, 4095, 4095, 4095, 0, 2383, + 2556, 433, 0, 4095, 1094, 4095, 4095, 642, 4095, 1722, 0, 3460, 4095, 4095, + 4095, 4095, 4095, 0, 154, 4095, 92, 4095, 0, 0, 0, 4095, 0, 4095, 4095, 444, + 0, 2925, 0, 0, 0, 0, 1628, 0, 4095, 1731, 2418, 697, 4095, 0, 2513, 4095, 0, + 4095, 4095, 4095, 4095, 4095, 0, 2510, 4095, 3850, 0, 0, 4095, 2480, 4095, + 4095, 2661, 4095, 0, 4095, 0, 0, 4095, 4095, 847, 4095, 4095, 3257, 443, 0, + 67, 0, 0, 0, 4095, 0, 0, 3073, 4095, 0, 4095, 0, 4095, 0, 4095, 1224, 4095, + 4095, 4095, 0, 4095, 958, 0, 4095, 0, 2327, 684, 0, 0, 0, 0, 4095, 4095, 0, + 3693, 795, 4095, 0, 621, 1592, 2314, 4095, 0, 928, 1897, 4095, 4095, 0, + 4095, 0, 0, 4095, 2619, 4095, 0, 4095, 0, 0, 4095, 2485, 4095, 4095, 0, 435, + 4095, 1818, 4095, 4095, 0, 0, 0, 4095, 4095, 4095, 4095, 0, 1671, 4095, + 4095, 0, 2617, 0, 2572, 0, 0, 4095, 3471, 0, 0, 4095, 2719, 3979, 1307, 0, + 0, 0, 0, 1794, 642, 447, 913, 4095, 3927, 0, 2686, 0, 0, 4095, 0, 857, 0, + 4095, 4095, 567, 2385, 0, 0, 4095, 893, 0, 289, 0, 0, 0, 4095, 4095, 2566, + 0, 1913, 0, 2350, 1033, 2764, 0, 4095, 0, 4095, 0, 0, 0, 0, 4095, 3952, + 3969, 0, 3476, 0, 4095, 4095, 393, 0, 2613, 0, 0, 1422, 0, 3359, 491, 3263, + 4095, 4095, 0, 0, 4095, 697, 3601, 4095, 0, 4095, 4095, 0, 4095, 0, 0, 4095, + 0, 4095, 4095, 4095, 2506, 0, 0, 1403, 0, 3836, 3976, 0, 4095, 4095, 4095, + 2497, 4095, 4095, 4095, 4095, 0, 4095, 3317, 4095, 4095, 4095, 0, 0, 1131, + 0, 0, 0, 4095, 0, 0, 4095, 0, 0, 2988, 4095, 4095, 2711, 2487, 1335, 0, 0, + 0, 4095, 261, 4095, 86, 0, 0, 1138, 4095, 0, 0, 4095, 4095, 0, 0, 0, 334, 0, + 2395, 3297, 4095, 1698, 4095, 1791, 1341, 0, 3559, 0, 4095, 0, 2056, 3238, + 3310, 4095, 4095, 779, 2129, 2849, 4095, 2622, 1051, 0, 0, 1282, 4095, 1246, + 0, 0, 3696, 4095, 556, 0, 0, 3463, 2658, 3572, 4095, 3982, 4095, 4095, 0, 0, + 4053, 4095, 4095, 4095, 2162, 2567, 1621, 4095, 4095, 1522, 293, 4095, 0, 0, + 1976, 4095, 3089, 4095, 0, 0, 0, 0, 3650, + // V plane: + 0, 1892, 4095, 1995, 0, 0, 0, 2208, 1152, 1794, 4095, 4095, 89, 3333, 4095, + 2478, 4095, 2505, 4095, 0, 2664, 4095, 1984, 0, 1144, 4095, 0, 4095, 0, + 4095, 0, 0, 0, 2404, 1727, 4095, 4095, 0, 1326, 2033, 0, 4095, 0, 4095, + 3022, 0, 4095, 0, 1980, 4095, 0, 2284, 4095, 0, 3422, 0, 4095, 2171, 3155, + 4095, 0, 4095, 0, 636, 0, 0, 4095, 3264, 3862, 0, 2164, 0, 0, 3879, 3886, 0, + 225, 0, 0, 4095, 0, 1956, 523, 464, 738, 0, 1545, 0, 2829, 4095, 4095, 4095, + 799, 4095, 358, 4095, 0, 0, 953, 0, 0, 2081, 4095, 1604, 4095, 2086, 0, 954, + 0, 0, 2393, 2413, 4095, 4095, 0, 3583, 4095, 4095, 2995, 4095, 0, 4095, + 4095, 3501, 4095, 247, 4095, 0, 0, 0, 4095, 1303, 3382, 1059, 4095, 0, 543, + 1276, 1801, 0, 0, 0, 2928, 0, 4095, 3931, 70, 0, 0, 3992, 4095, 1278, 1930, + 4095, 0, 4095, 4095, 3894, 0, 0, 0, 0, 4095, 0, 0, 0, 0, 0, 0, 4095, 4095, + 4095, 1098, 4095, 2059, 0, 380, 3166, 0, 4095, 2215, 0, 0, 2846, 0, 0, 2614, + 528, 4095, 0, 4095, 2371, 0, 4095, 0, 0, 0, 0, 4095, 3133, 4095, 4095, 0, + 4095, 1283, 3821, 1772, 0, 0, 4095, 4095, 4095, 890, 3475, 4095, 4095, 133, + 3292, 1819, 4095, 4095, 4095, 0, 0, 4095, 702, 4095, 0, 0, 0, 4095, 0, 2137, + 4095, 4095, 4095, 0, 0, 0, 4095, 4095, 1555, 2435, 2778, 4095, 0, 4095, + 3825, 0, 3736, 3054, 0, 0, 4095, 4095, 4095, 0, 0, 0, 0, 371, 4095, 4095, 0, + 0, 1565, 4095, 2731, 4095, 0, 756, 925, 0, 0, 0, 4095, 775, 1379, 4095, + 1439, 0, 0, 0, 2680, 0, 0, 4095, 1280, 4095, 0, 0, 4095, 4095, 0, 3088, 0, + 4095, 4095, 4095, 0, 0, 1526, 4095, 2314, 4095, 4095, 0, 4095, 288, 0, 205, + 4095, 4095, 4095, 0, 1247, 2014, 0, 1530, 1985, 0, 0, 4095, 3195, 0, 4095, + 4, 2397, 4095, 4095, 4095, 0, 4095, 4095, 4095, 0, 0, 0, 0, 0, 4031, 928, + 4095, 0, 0, 4095, 4095, 4095, 1966, 4095, 2299, 1215, 4095, 0, 4095, 1335, + 0, 4095, 1991, 4095, 0, 4095, 114, 0, 0, 0, 2123, 2639, 4095, 3323, 4095, + 4095, 418, 209, 0, 0, 4095, 4095, 4095, 4095, 963, 0, 0, 0, 4095, 2505, 0, + 3627, 0, 311, 3748, 2047, 4095, 2791, 0, 3643, 1852, 0, 0, 4095, 0, 2179, 0, + 4095, 2678, 0, 0, 0, 2342, 4095, 4095, 0, 0, 4095, 0, 0, 0, 0, 1076, 0, 0, + 4095, 0, 2370, 0, 3530, 0, 0, 0, 0, 0, 4095, 0, 0, 0, 3474, 1201, 0, 379, + 699, 4095, 777, 4095, 0, 4095, 4095, 0, 1213, 1762, 4095, 4095, 4095, 0, + 4095, 1090, 1233, 0, 4095, 0, 4095, 0, 0, 0, 2845, 3385, 2718, 0, 0, 2975, + 3630, 0, 4095, 4095, 4095, 4095, 3261, 243, 0, 4095, 0, 0, 3836, 4095, 4095, + 4095, 963, 0, 0, 2526, 0, 4095, 4000, 4095, 2069, 0, 0, 4095, 0, 4095, 1421, + 0, 4095, 0, 4095, 4095, 0, 4095, 0, 4095, 4095, 1537, 4095, 3201, 0, 0, + 4095, 2719, 4095, 0, 4095, 4095, 4095, 0, 4095, 0, 4095, 2300, 0, 2876, 0, + 4095, 4095, 4095, 3235, 497, 635, 0, 1480, 4095, 0, 3067, 3979, 3741, 0, + 3059, 1214, 4095, 4095, 2197, 0, 4095, 4095, 2734, 0, 4095, 4095, 3364, + 2369, 4095, 303, 4095, 0, 4095, 4095, 3472, 1733, 4095, 4095, 4095, 0, 55, + 0, 10, 1378, 1169, 4095, 0, 0, 688, 3613, 0, 4095, 2832, 867, 4095, 4095, + 3514, 4095, 0, 4095, 4095, 2458, 3506, 0, 1920, 0, 1762, 1178, 2549, 4095, + 3967, 4095, 0, 2975, 1282, 0, 377, 846, 3434, 97, 0, 0, 1616, 3526, 136, + 1888, 0, 147, 334, 4095, 0, 4095, 0, 4095, 1106, 4095, 0, 4095, 3280, 4095, + 4095, 0, 2849, 3528, 0, 4095, 4095, 0, 2306, 0, 3412, 0, 4095, 4095, 4095, + 4048, 2273, 0, 4095, 4095, 4095, 0, 4095, 3031, 4095, 4095, 4095, 0, 3382, + 3812, 2315, 4095, 0, 0, 0, 432, 4095, 3606, 0, 4, 2847, 4095, 0, 4095, 0, 0, + 2616, 4095, 4095, 0, 4095, 0, 3394, 4095, 3976, 3119, 0, 0, 0, 0, 4046, + 4095, 4095, 3331, 4095, 2127, 0, 4095, 0, 0, 0, 4095, 4095, 4095, 0, 4095, + 4095, 4095, 0, 2068, 0, 0, 3882, 2967, 0, 1745, 4095, 2112, 478, 0, 4095, 0, + 199, 4095, 4095, 3542, 4095, 2634, 4095, 4095, 1235, 4095, 4095, 167, 1553, + 0, 4095, 2649, 0, 3383, 0, 4095, 2803, 4095, 0, 4095, 0, 785, 4095, 0, 4095, + 1743, 4095, 0, 3945, 0, 4095, 1894, 4095, 3973, 4095, 0, 0, 4095, 0, 0, + 4095, 318, 4095, 4095, 4095, 0, 261, 4095, 4095, 2125, 2690, 4095, 0, 4095, + 3863, 1740, 4095, 0, 2899, 1509, 0, 0, 0, 2780, 4095, 1897, 2104, 4095, + 1708, 284, 4095, 0, 4095, 3382, 4095, 4095, 483, 0, 0, 0, 3099, 0, 4095, 0, + 926, 4095, 2062, 1931, 2121, 0, 4095, 0, 2485, 1535, 4095, 4095, 3662, 4095, + 2419, 2487, 0, 4095, 4095, 4095, 0, 0, 4095, 0, 0, 2029, 0, 3008, 2338, 0, + 4095, 0, 3854, 0, 4095, 0, 0, 1315, 0, 0, 0, 0, 3492, 0, 1445, 0, 11, 4095, + 0, 0, 873, 0, 4095, 0, 4095, 2654, 3040, 0, 0, 0, 4095, 0, 68, 4095, 0, 0, + 990, 0, 828, 1015, 88, 3606, 0, 2875, 4095, 0, 3117, 411, 0, 0, 2859, 0, 0, + 4095, 3480, 25, 4095, 4095, 4095, 0, 0, 0, 4095, 4095, 4095, 4095, 1724, 0, + 0, 0, 3635, 1063, 3728, 4095, 4095, 2025, 3715, 0, 0, 0, 3722, 0, 1648, 0, + 4095, 3579, 0, 0, 0, 4095, 4095, 0, 4095 + }; + unsigned char *img_data = + reinterpret_cast<unsigned char *>(const_cast<uint16_t *>(buffer)); + + aom_image_t img; + EXPECT_EQ( + aom_img_wrap(&img, AOM_IMG_FMT_I44416, kWidth, kHeight, 1, img_data), + &img); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA), + AOM_CODEC_OK); + cfg.rc_end_usage = AOM_Q; + cfg.g_profile = 2; + cfg.g_bit_depth = AOM_BITS_12; + cfg.g_input_bit_depth = 12; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_limit = 1; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = AOM_KF_DISABLED; + cfg.kf_max_dist = 0; + cfg.g_threads = 34; + cfg.rc_min_quantizer = 8; + cfg.rc_max_quantizer = 20; + aom_codec_ctx_t enc; + EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 14), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 1), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_ROWS, 4), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_COLUMNS, 4), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 0), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM), + AOM_CODEC_OK); + + // Encode frame + EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +// A test that reproduces b/272139363: signed integer overflow in +// update_b_sep_sym(). +TEST(SearchWienerTest, 10bitSignedIntegerOverflowInUpdateBSepSym) { + constexpr int kWidth = 34; + constexpr int kHeight = 3; + static const uint16_t buffer[3 * kWidth * kHeight] = { + // Y plane: + 61, 765, 674, 188, 367, 944, 153, 275, 906, 433, 154, 51, 8, 855, 186, 154, + 392, 0, 634, 3, 690, 1023, 1023, 1023, 1023, 1023, 1023, 8, 1, 64, 426, 0, + 100, 344, 944, 816, 816, 33, 1023, 1023, 1023, 1023, 295, 1023, 1023, 1023, + 1023, 1023, 1023, 1015, 1023, 231, 1020, 254, 439, 439, 894, 439, 150, 1019, + 1023, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 385, 320, 575, + 682, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 1023, 511, 699, 987, 3, 140, + 661, 120, 33, 143, 0, 0, 0, 3, 40, 625, 585, 16, 579, 160, 867, + // U plane: + 739, 646, 13, 603, 7, 328, 91, 32, 488, 870, 330, 330, 330, 330, 330, 330, + 109, 330, 330, 330, 3, 545, 945, 249, 35, 561, 801, 32, 931, 639, 801, 91, + 1023, 827, 844, 948, 631, 894, 854, 601, 432, 504, 85, 1, 0, 0, 89, 89, 0, + 0, 0, 0, 0, 0, 432, 801, 382, 4, 0, 0, 2, 89, 89, 89, 89, 89, 89, 384, 0, 0, + 0, 0, 0, 0, 0, 1023, 1019, 1, 3, 691, 575, 691, 691, 691, 691, 691, 691, + 691, 691, 691, 691, 691, 84, 527, 4, 485, 8, 682, 698, 340, 1015, 706, + // V plane: + 49, 10, 28, 1023, 1023, 1023, 0, 32, 32, 872, 114, 1003, 1023, 57, 477, 999, + 1023, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309, 309, + 9, 418, 418, 418, 418, 418, 418, 0, 0, 0, 1023, 4, 5, 0, 0, 1023, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 64, 0, 155, 709, 3, 331, 807, 633, 1023, + 1018, 646, 886, 991, 692, 915, 294, 0, 35, 2, 0, 471, 643, 770, 346, 176, + 32, 329, 322, 302, 61, 765, 674, 188, 367, 944, 153, 275, 906, 433, 154 + }; + unsigned char *img_data = + reinterpret_cast<unsigned char *>(const_cast<uint16_t *>(buffer)); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I44416, kWidth, kHeight, 1, + img_data)); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA)); + cfg.rc_end_usage = AOM_Q; + cfg.g_profile = 1; + cfg.g_bit_depth = AOM_BITS_10; + cfg.g_input_bit_depth = 10; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_limit = 1; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = AOM_KF_DISABLED; + cfg.kf_max_dist = 0; + cfg.rc_min_quantizer = 3; + cfg.rc_max_quantizer = 54; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 28)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AV1E_SET_TILE_COLUMNS, 3)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 0)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM)); + + // Encode frame + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + +// A test that reproduces b/277121724: signed integer overflow in +// update_b_sep_sym(). +TEST(SearchWienerTest, 8bitSignedIntegerOverflowInUpdateBSepSym) { + constexpr int kWidth = 198; + constexpr int kHeight = 3; + // 8-bit YUV 4:2:2 + static const unsigned char buffer[2 * kWidth * kHeight] = { + // Y plane: + 35, 225, 56, 91, 8, 142, 137, 143, 224, 49, 217, 57, 202, 163, 159, 246, + 232, 134, 135, 14, 76, 101, 239, 88, 186, 159, 118, 23, 114, 20, 108, 41, + 72, 17, 58, 242, 45, 146, 230, 14, 135, 140, 34, 61, 189, 181, 222, 71, 98, + 221, 5, 199, 244, 85, 229, 163, 105, 87, 144, 105, 64, 150, 36, 233, 235, 1, + 179, 190, 50, 222, 176, 109, 166, 18, 80, 129, 45, 9, 218, 144, 234, 10, + 148, 117, 37, 10, 232, 139, 206, 92, 208, 247, 128, 79, 202, 79, 212, 89, + 185, 152, 206, 182, 83, 105, 21, 86, 150, 84, 21, 165, 34, 251, 174, 240, + 172, 155, 254, 85, 98, 25, 96, 78, 230, 253, 36, 19, 247, 155, 112, 216, + 166, 114, 229, 118, 197, 149, 186, 194, 128, 45, 219, 26, 36, 77, 110, 45, + 252, 238, 183, 161, 171, 96, 232, 108, 73, 61, 243, 58, 155, 38, 91, 209, + 187, 206, 16, 165, 236, 145, 69, 126, 102, 10, 4, 43, 191, 106, 193, 240, + 132, 226, 38, 78, 7, 152, 101, 255, 254, 39, 33, 86, 35, 247, 199, 179, 239, + 198, 165, 58, 190, 171, 226, 94, 158, 21, 190, 151, 75, 176, 11, 53, 199, + 87, 91, 1, 226, 20, 117, 96, 75, 192, 101, 200, 125, 106, 233, 176, 63, 204, + 114, 16, 31, 222, 15, 14, 71, 2, 25, 47, 100, 174, 26, 209, 138, 138, 211, + 147, 164, 204, 9, 104, 135, 250, 9, 201, 88, 218, 71, 251, 61, 199, 0, 34, + 59, 115, 228, 161, 100, 132, 50, 4, 117, 100, 191, 126, 53, 28, 193, 42, + 155, 206, 79, 80, 117, 11, 3, 253, 181, 181, 138, 239, 107, 142, 216, 57, + 202, 126, 229, 250, 60, 62, 150, 128, 95, 32, 251, 207, 236, 208, 247, 183, + 59, 19, 117, 40, 106, 87, 140, 57, 109, 190, 51, 105, 226, 116, 156, 3, 35, + 86, 255, 138, 52, 211, 245, 76, 83, 109, 113, 77, 106, 77, 18, 56, 235, 158, + 24, 53, 151, 104, 152, 21, 15, 46, 163, 144, 217, 168, 154, 44, 80, 25, 11, + 37, 100, 235, 145, 154, 113, 0, 140, 153, 80, 64, 19, 121, 185, 144, 43, + 206, 16, 16, 72, 189, 175, 231, 177, 40, 177, 206, 116, 4, 82, 43, 244, 237, + 22, 252, 71, 194, 106, 4, 112, 0, 108, 137, 126, 80, 122, 142, 43, 205, 22, + 209, 217, 165, 32, 208, 100, 70, 3, 120, 159, 203, 7, 233, 152, 37, 96, 212, + 177, 1, 133, 218, 161, 172, 202, 192, 186, 114, 150, 121, 177, 227, 175, 64, + 127, 153, 113, 91, 198, 0, 111, 227, 226, 218, 71, 62, 5, 43, 128, 27, 3, + 82, 5, 10, 68, 153, 215, 181, 138, 246, 224, 170, 1, 241, 191, 181, 151, + 167, 14, 80, 45, 4, 252, 29, 66, 125, 58, 225, 253, 255, 248, 224, 40, 24, + 236, 46, 11, 219, 154, 134, 12, 76, 72, 97, 239, 50, 39, 85, 182, 55, 219, + 19, 109, 81, 119, 125, 206, 159, 239, 67, 193, 180, 132, 80, 127, 2, 169, + 99, 53, 47, 5, 100, 174, 151, 124, 246, 202, 93, 82, 65, 53, 214, 238, 32, + 218, 15, 254, 153, 95, 79, 189, 67, 233, 47, 83, 48, 125, 144, 206, 82, 69, + 186, 112, 134, 244, 96, 21, 143, 187, 248, 8, 224, 161, 227, 185, 236, 6, + 175, 237, 169, 154, 89, 143, 106, 205, 26, 47, 155, 42, 28, 162, 7, 8, 45, + // U plane: + 55, 165, 203, 139, 152, 208, 36, 177, 61, 49, 129, 211, 140, 71, 253, 250, + 120, 167, 238, 67, 255, 223, 104, 32, 240, 179, 28, 41, 86, 84, 61, 243, + 169, 212, 201, 0, 9, 236, 89, 194, 204, 75, 228, 250, 27, 81, 137, 29, 255, + 131, 194, 241, 76, 133, 186, 135, 212, 197, 150, 145, 203, 96, 86, 231, 91, + 119, 197, 67, 226, 2, 118, 66, 181, 86, 219, 86, 132, 137, 156, 161, 221, + 18, 55, 170, 35, 206, 201, 193, 38, 63, 229, 29, 110, 96, 14, 135, 229, 99, + 106, 108, 167, 110, 50, 32, 144, 113, 48, 29, 57, 29, 20, 199, 145, 245, 9, + 183, 88, 174, 114, 237, 29, 40, 99, 117, 233, 6, 51, 227, 2, 28, 76, 149, + 190, 23, 240, 73, 113, 10, 73, 240, 105, 220, 129, 26, 144, 214, 34, 4, 24, + 219, 24, 156, 198, 214, 244, 143, 106, 255, 204, 93, 2, 88, 107, 211, 241, + 242, 86, 189, 219, 164, 132, 149, 32, 228, 219, 60, 202, 218, 189, 34, 250, + 160, 158, 36, 212, 212, 41, 233, 61, 92, 121, 170, 220, 192, 232, 255, 124, + 249, 231, 55, 196, 219, 196, 62, 238, 187, 76, 33, 138, 67, 82, 159, 169, + 196, 66, 196, 110, 194, 64, 35, 205, 64, 218, 12, 41, 188, 195, 244, 178, + 17, 80, 8, 149, 39, 110, 146, 164, 162, 215, 227, 107, 103, 47, 52, 95, 3, + 181, 90, 255, 80, 83, 206, 66, 153, 112, 72, 109, 235, 69, 105, 57, 75, 145, + 186, 16, 87, 73, 61, 98, 197, 237, 17, 32, 207, 220, 246, 188, 46, 73, 121, + 84, 252, 164, 111, 21, 98, 13, 170, 174, 170, 231, 77, 10, 113, 9, 217, 11, + // V plane: + 124, 94, 69, 212, 107, 223, 228, 96, 56, 2, 158, 49, 251, 217, 143, 107, + 113, 17, 84, 169, 208, 43, 28, 37, 176, 54, 235, 150, 135, 135, 221, 94, 50, + 131, 251, 78, 38, 254, 129, 200, 207, 55, 111, 110, 144, 109, 228, 65, 70, + 39, 170, 5, 208, 151, 87, 86, 255, 74, 155, 153, 250, 15, 35, 33, 201, 226, + 117, 119, 220, 238, 133, 229, 69, 122, 160, 114, 245, 182, 13, 65, 2, 228, + 205, 174, 128, 248, 4, 139, 178, 227, 204, 243, 249, 253, 119, 253, 107, + 234, 39, 15, 173, 47, 93, 12, 222, 238, 30, 121, 124, 167, 27, 40, 215, 84, + 172, 130, 66, 43, 165, 55, 225, 79, 84, 153, 59, 110, 64, 176, 54, 123, 82, + 128, 189, 150, 52, 202, 102, 133, 199, 197, 253, 180, 221, 127, 144, 124, + 255, 224, 52, 149, 88, 166, 39, 38, 78, 114, 44, 242, 233, 40, 132, 142, + 152, 213, 112, 244, 221, 7, 52, 206, 246, 51, 182, 160, 247, 154, 183, 209, + 81, 70, 56, 186, 63, 182, 2, 82, 202, 178, 233, 52, 198, 241, 175, 38, 165, + 9, 231, 150, 114, 43, 159, 200, 42, 173, 217, 25, 233, 214, 210, 50, 43, + 159, 231, 102, 241, 246, 77, 76, 115, 77, 81, 114, 194, 182, 236, 0, 236, + 198, 197, 180, 176, 148, 48, 177, 106, 180, 150, 158, 237, 130, 242, 109, + 174, 247, 57, 230, 184, 64, 245, 251, 123, 169, 122, 156, 125, 123, 104, + 238, 1, 235, 187, 53, 67, 38, 50, 139, 123, 149, 111, 72, 80, 17, 175, 186, + 98, 153, 247, 97, 218, 141, 38, 0, 171, 254, 180, 81, 233, 71, 156, 48, 14, + 62, 210, 161, 124, 203, 92 + }; + unsigned char *img_data = const_cast<unsigned char *>(buffer); + + aom_image_t img; + EXPECT_EQ(aom_img_wrap(&img, AOM_IMG_FMT_I422, kWidth, kHeight, 1, img_data), + &img); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA), + AOM_CODEC_OK); + cfg.rc_end_usage = AOM_Q; + cfg.g_profile = 2; + cfg.g_bit_depth = AOM_BITS_8; + cfg.g_input_bit_depth = 8; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_limit = 1; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = AOM_KF_DISABLED; + cfg.kf_max_dist = 0; + cfg.g_threads = 43; + cfg.rc_min_quantizer = 30; + cfg.rc_max_quantizer = 50; + aom_codec_ctx_t enc; + EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, 0), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 40), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 1), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_ROWS, 4), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_COLUMNS, 1), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 2), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM), + AOM_CODEC_OK); + + // Encode frame + EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +// A test that reproduces b/259173819: signed integer overflow in +// linsolve_wiener(). +TEST(SearchWienerTest, 10bitSignedIntegerOverflowInLinsolveWiener) { + constexpr int kWidth = 3; + constexpr int kHeight = 3; + static const uint16_t buffer[3 * kWidth * kHeight] = { + // Y plane: + 81, 81, 1023, 1020, 81, 1023, 81, 128, 0, + // U plane: + 273, 273, 273, 273, 273, 273, 273, 273, 273, + // V plane: + 273, 273, 273, 273, 273, 273, 516, 81, 81 + }; + unsigned char *img_data = + reinterpret_cast<unsigned char *>(const_cast<uint16_t *>(buffer)); + + aom_image_t img; + EXPECT_EQ( + aom_img_wrap(&img, AOM_IMG_FMT_I44416, kWidth, kHeight, 1, img_data), + &img); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_ALL_INTRA), + AOM_CODEC_OK); + cfg.rc_end_usage = AOM_Q; + cfg.g_profile = 1; + cfg.g_bit_depth = AOM_BITS_10; + cfg.g_input_bit_depth = 10; + cfg.g_w = kWidth; + cfg.g_h = kHeight; + cfg.g_limit = 1; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = AOM_KF_DISABLED; + cfg.kf_max_dist = 0; + cfg.g_threads = 21; + cfg.rc_min_quantizer = 16; + cfg.rc_max_quantizer = 54; + aom_codec_ctx_t enc; + EXPECT_EQ(aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 35), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_ROW_MT, 1), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_ROWS, 2), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_TILE_COLUMNS, 5), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_CPUUSED, 1), AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AV1E_SET_SKIP_POSTPROC_FILTERING, 1), + AOM_CODEC_OK); + EXPECT_EQ(aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM), + AOM_CODEC_OK); + + // Encode frame + EXPECT_EQ(aom_codec_encode(&enc, &img, 0, 1, 0), AOM_CODEC_OK); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + ASSERT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(aom_codec_encode(&enc, nullptr, 0, 1, 0), AOM_CODEC_OK); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK); +} + +} // namespace wiener_highbd +#endif // CONFIG_AV1_HIGHBITDEPTH diff --git a/third_party/aom/test/y4m_test.cc b/third_party/aom/test/y4m_test.cc new file mode 100644 index 0000000000..a4ed13f7c5 --- /dev/null +++ b/third_party/aom/test/y4m_test.cc @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <string> + +#include "config/aom_config.h" + +#include "common/y4menc.h" +#include "test/md5_helper.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" + +namespace { + +using std::string; + +static const unsigned int kWidth = 160; +static const unsigned int kHeight = 90; +static const unsigned int kFrames = 10; + +struct Y4mTestParam { + const char *filename; + unsigned int bit_depth; + aom_img_fmt format; + const char *md5raw; +}; + +const Y4mTestParam kY4mTestVectors[] = { + { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, + "e5406275b9fc6bb3436c31d4a05c1cab" }, + { "park_joy_90p_8_420_monochrome.y4m", 8, AOM_IMG_FMT_I420, + "95ef5bf6218580588be24a5271bb6a7f" }, + { "park_joy_90p_8_420_vertical_csp.y4m", 8, AOM_IMG_FMT_I420, + "e5406275b9fc6bb3436c31d4a05c1cab" }, + { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, + "284a47a47133b12884ec3a14e959a0b6" }, + { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, + "90517ff33843d85de712fd4fe60dbed0" }, + { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, + "63f21f9f717d8b8631bd2288ee87137b" }, + { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, + "48ab51fb540aed07f7ff5af130c9b605" }, + { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, + "067bfd75aa85ff9bae91fa3e0edd1e3e" }, + { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, + "9e6d8f6508c6e55625f6b697bc461cef" }, + { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, + "b239c6b301c0b835485be349ca83a7e3" }, + { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, + "5a6481a550821dab6d0192f5c63845e9" }, +}; + +static const int PLANES_YUV[] = { AOM_PLANE_Y, AOM_PLANE_U, AOM_PLANE_V }; + +class Y4mVideoSourceTest : public ::testing::TestWithParam<Y4mTestParam>, + public ::libaom_test::Y4mVideoSource { + protected: + Y4mVideoSourceTest() : Y4mVideoSource("", 0, 0) {} + + ~Y4mVideoSourceTest() override { CloseSource(); } + + virtual void Init(const std::string &file_name, int limit) { + file_name_ = file_name; + start_ = 0; + limit_ = limit; + frame_ = 0; + Begin(); + } + + // Checks y4m header information + void HeaderChecks(unsigned int bit_depth, aom_img_fmt_t fmt) { + ASSERT_NE(input_file_, nullptr); + ASSERT_EQ(y4m_.pic_w, (int)kWidth); + ASSERT_EQ(y4m_.pic_h, (int)kHeight); + ASSERT_EQ(img()->d_w, kWidth); + ASSERT_EQ(img()->d_h, kHeight); + ASSERT_EQ(y4m_.bit_depth, bit_depth); + ASSERT_EQ(y4m_.aom_fmt, fmt); + if (fmt == AOM_IMG_FMT_I420 || fmt == AOM_IMG_FMT_I42016) { + ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3 / 2); + ASSERT_EQ(img()->x_chroma_shift, 1U); + ASSERT_EQ(img()->y_chroma_shift, 1U); + } + if (fmt == AOM_IMG_FMT_I422 || fmt == AOM_IMG_FMT_I42216) { + ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 2); + ASSERT_EQ(img()->x_chroma_shift, 1U); + ASSERT_EQ(img()->y_chroma_shift, 0U); + } + if (fmt == AOM_IMG_FMT_I444 || fmt == AOM_IMG_FMT_I44416) { + ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3); + ASSERT_EQ(img()->x_chroma_shift, 0U); + ASSERT_EQ(img()->y_chroma_shift, 0U); + } + } + + // Checks MD5 of the raw frame data + void Md5Check(const string &expected_md5) { + ASSERT_NE(input_file_, nullptr); + libaom_test::MD5 md5; + for (unsigned int i = start_; i < limit_; i++) { + md5.Add(img()); + Next(); + } + ASSERT_EQ(string(md5.Get()), expected_md5); + } +}; + +TEST_P(Y4mVideoSourceTest, SourceTest) { + const Y4mTestParam t = GetParam(); + Init(t.filename, kFrames); + HeaderChecks(t.bit_depth, t.format); + Md5Check(t.md5raw); +} + +INSTANTIATE_TEST_SUITE_P(C, Y4mVideoSourceTest, + ::testing::ValuesIn(kY4mTestVectors)); + +class Y4mVideoWriteTest : public Y4mVideoSourceTest { + protected: + Y4mVideoWriteTest() : tmpfile_(nullptr) {} + + ~Y4mVideoWriteTest() override { + delete tmpfile_; + input_file_ = nullptr; + } + + void ReplaceInputFile(FILE *input_file) { + CloseSource(); + frame_ = 0; + input_file_ = input_file; + rewind(input_file_); + ReadSourceToStart(); + } + + // Writes out a y4m file and then reads it back + void WriteY4mAndReadBack() { + ASSERT_NE(input_file_, nullptr); + char buf[Y4M_BUFFER_SIZE] = { 0 }; + const struct AvxRational framerate = { y4m_.fps_n, y4m_.fps_d }; + tmpfile_ = new libaom_test::TempOutFile; + ASSERT_NE(tmpfile_, nullptr); + ASSERT_NE(tmpfile_->file(), nullptr); + y4m_write_file_header(buf, sizeof(buf), kWidth, kHeight, &framerate, + img()->monochrome, img()->csp, y4m_.aom_fmt, + y4m_.bit_depth, AOM_CR_STUDIO_RANGE); + fputs(buf, tmpfile_->file()); + for (unsigned int i = start_; i < limit_; i++) { + y4m_write_frame_header(buf, sizeof(buf)); + fputs(buf, tmpfile_->file()); + y4m_write_image_file(img(), PLANES_YUV, tmpfile_->file()); + Next(); + } + ReplaceInputFile(tmpfile_->file()); + } + + void Init(const std::string &file_name, int limit) override { + Y4mVideoSourceTest::Init(file_name, limit); + WriteY4mAndReadBack(); + } + libaom_test::TempOutFile *tmpfile_; +}; + +TEST_P(Y4mVideoWriteTest, WriteTest) { + const Y4mTestParam t = GetParam(); + Init(t.filename, kFrames); + HeaderChecks(t.bit_depth, t.format); + Md5Check(t.md5raw); +} + +INSTANTIATE_TEST_SUITE_P(C, Y4mVideoWriteTest, + ::testing::ValuesIn(kY4mTestVectors)); + +static const char kY4MRegularHeader[] = + "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG\n" + "FRAME\n" + "012345678912345601230123"; + +TEST(Y4MHeaderTest, RegularHeader) { + libaom_test::TempOutFile f; + ASSERT_NE(f.file(), nullptr); + fwrite(kY4MRegularHeader, 1, sizeof(kY4MRegularHeader), f.file()); + fflush(f.file()); + EXPECT_EQ(0, fseek(f.file(), 0, 0)); + + y4m_input y4m; + EXPECT_EQ(y4m_input_open(&y4m, f.file(), nullptr, 0, AOM_CSP_UNKNOWN, + /*only_420=*/0), + 0); + EXPECT_EQ(y4m.pic_w, 4); + EXPECT_EQ(y4m.pic_h, 4); + EXPECT_EQ(y4m.fps_n, 30); + EXPECT_EQ(y4m.fps_d, 1); + EXPECT_EQ(y4m.interlace, 'p'); + EXPECT_EQ(y4m.color_range, AOM_CR_STUDIO_RANGE); + EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0); + y4m_input_close(&y4m); +} + +// Testing that headers over 100 characters can be parsed. +static const char kY4MLongHeader[] = + "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG " + "XCOLORRANGE=LIMITED XSOME_UNKNOWN_METADATA XOTHER_UNKNOWN_METADATA\n" + "FRAME\n" + "012345678912345601230123"; + +TEST(Y4MHeaderTest, LongHeader) { + libaom_test::TempOutFile tmpfile; + FILE *f = tmpfile.file(); + ASSERT_NE(f, nullptr); + fwrite(kY4MLongHeader, 1, sizeof(kY4MLongHeader), f); + fflush(f); + EXPECT_EQ(fseek(f, 0, 0), 0); + + y4m_input y4m; + EXPECT_EQ(y4m_input_open(&y4m, f, nullptr, 0, AOM_CSP_UNKNOWN, + /*only_420=*/0), + 0); + EXPECT_EQ(y4m.pic_w, 4); + EXPECT_EQ(y4m.pic_h, 4); + EXPECT_EQ(y4m.fps_n, 30); + EXPECT_EQ(y4m.fps_d, 1); + EXPECT_EQ(y4m.interlace, 'p'); + EXPECT_EQ(y4m.color_range, AOM_CR_STUDIO_RANGE); + EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0); + y4m_input_close(&y4m); +} + +static const char kY4MFullRangeHeader[] = + "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG XCOLORRANGE=FULL\n" + "FRAME\n" + "012345678912345601230123"; + +TEST(Y4MHeaderTest, FullRangeHeader) { + libaom_test::TempOutFile tmpfile; + FILE *f = tmpfile.file(); + ASSERT_NE(f, nullptr); + fwrite(kY4MFullRangeHeader, 1, sizeof(kY4MFullRangeHeader), f); + fflush(f); + EXPECT_EQ(fseek(f, 0, 0), 0); + + y4m_input y4m; + EXPECT_EQ(y4m_input_open(&y4m, f, nullptr, 0, AOM_CSP_UNKNOWN, + /*only_420=*/0), + 0); + EXPECT_EQ(y4m.pic_w, 4); + EXPECT_EQ(y4m.pic_h, 4); + EXPECT_EQ(y4m.fps_n, 30); + EXPECT_EQ(y4m.fps_d, 1); + EXPECT_EQ(y4m.interlace, 'p'); + EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0); + EXPECT_EQ(y4m.color_range, AOM_CR_FULL_RANGE); + y4m_input_close(&y4m); +} + +TEST(Y4MHeaderTest, WriteStudioColorRange) { + char buf[128]; + struct AvxRational framerate = { /*numerator=*/30, /*denominator=*/1 }; + EXPECT_GE(y4m_write_file_header( + buf, /*len=*/128, /*width=*/4, /*height=*/5, &framerate, + /*monochrome=*/0, AOM_CSP_UNKNOWN, AOM_IMG_FMT_I420, + /*bit_depth=*/8, AOM_CR_STUDIO_RANGE), + 0); + EXPECT_EQ(strcmp("YUV4MPEG2 W4 H5 F30:1 Ip C420jpeg\n", buf), 0); +} + +TEST(Y4MHeaderTest, WriteFullColorRange) { + char buf[128]; + struct AvxRational framerate = { /*numerator=*/30, /*denominator=*/1 }; + EXPECT_GE(y4m_write_file_header( + buf, /*len=*/128, /*width=*/4, /*height=*/5, &framerate, + /*monochrome=*/0, AOM_CSP_UNKNOWN, AOM_IMG_FMT_I420, + /*bit_depth=*/8, AOM_CR_FULL_RANGE), + 0); + EXPECT_EQ(strcmp("YUV4MPEG2 W4 H5 F30:1 Ip C420jpeg XCOLORRANGE=FULL\n", buf), + 0); +} + +} // namespace diff --git a/third_party/aom/test/y4m_video_source.h b/third_party/aom/test/y4m_video_source.h new file mode 100644 index 0000000000..1369e4e280 --- /dev/null +++ b/third_party/aom/test/y4m_video_source.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef AOM_TEST_Y4M_VIDEO_SOURCE_H_ +#define AOM_TEST_Y4M_VIDEO_SOURCE_H_ +#include <algorithm> +#include <memory> +#include <string> + +#include "common/y4minput.h" +#include "test/video_source.h" + +namespace libaom_test { + +// This class extends VideoSource to allow parsing of raw yv12 +// so that we can do actual file encodes. +class Y4mVideoSource : public VideoSource { + public: + Y4mVideoSource(const std::string &file_name, unsigned int start, int limit) + : file_name_(file_name), input_file_(nullptr), img_(new aom_image_t()), + start_(start), limit_(limit), frame_(0), framerate_numerator_(0), + framerate_denominator_(0), y4m_() {} + + ~Y4mVideoSource() override { + aom_img_free(img_.get()); + CloseSource(); + } + + virtual void OpenSource() { + CloseSource(); + input_file_ = OpenTestDataFile(file_name_); + ASSERT_NE(input_file_, nullptr) + << "Input file open failed. Filename: " << file_name_; + } + + virtual void ReadSourceToStart() { + ASSERT_NE(input_file_, nullptr); + ASSERT_FALSE( + y4m_input_open(&y4m_, input_file_, nullptr, 0, AOM_CSP_UNKNOWN, 0)); + framerate_numerator_ = y4m_.fps_n; + framerate_denominator_ = y4m_.fps_d; + frame_ = 0; + for (unsigned int i = 0; i < start_; i++) { + Next(); + } + FillFrame(); + } + + void Begin() override { + OpenSource(); + ReadSourceToStart(); + } + + void Next() override { + ++frame_; + FillFrame(); + } + + aom_image_t *img() const override { + return (frame_ < limit_) ? img_.get() : nullptr; + } + + // Models a stream where Timebase = 1/FPS, so pts == frame. + aom_codec_pts_t pts() const override { return frame_; } + + unsigned long duration() const override { return 1; } + + aom_rational_t timebase() const override { + const aom_rational_t t = { framerate_denominator_, framerate_numerator_ }; + return t; + } + + unsigned int frame() const override { return frame_; } + + unsigned int limit() const override { return limit_; } + + virtual void FillFrame() { + ASSERT_NE(input_file_, nullptr); + // Read a frame from input_file. + y4m_input_fetch_frame(&y4m_, input_file_, img_.get()); + } + + // Swap buffers with another y4m source. This allows reading a new frame + // while keeping the old frame around. A whole Y4mSource is required and + // not just a aom_image_t because of how the y4m reader manipulates + // aom_image_t internals, + void SwapBuffers(Y4mVideoSource *other) { + std::swap(other->y4m_.dst_buf, y4m_.dst_buf); + aom_image_t *tmp; + tmp = other->img_.release(); + other->img_.reset(img_.release()); + img_.reset(tmp); + } + + protected: + void CloseSource() { + y4m_input_close(&y4m_); + y4m_ = y4m_input(); + if (input_file_ != nullptr) { + fclose(input_file_); + input_file_ = nullptr; + } + } + + std::string file_name_; + FILE *input_file_; + std::unique_ptr<aom_image_t> img_; + unsigned int start_; + unsigned int limit_; + unsigned int frame_; + int framerate_numerator_; + int framerate_denominator_; + y4m_input y4m_; +}; + +} // namespace libaom_test + +#endif // AOM_TEST_Y4M_VIDEO_SOURCE_H_ diff --git a/third_party/aom/test/yuv_video_source.h b/third_party/aom/test/yuv_video_source.h new file mode 100644 index 0000000000..77d5dfa73c --- /dev/null +++ b/third_party/aom/test/yuv_video_source.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#ifndef AOM_TEST_YUV_VIDEO_SOURCE_H_ +#define AOM_TEST_YUV_VIDEO_SOURCE_H_ + +#include <cstdio> +#include <cstdlib> +#include <string> + +#include "test/video_source.h" +#include "aom/aom_image.h" + +namespace libaom_test { + +// This class extends VideoSource to allow parsing of raw YUV +// formats of various color sampling and bit-depths so that we can +// do actual file encodes. +class YUVVideoSource : public VideoSource { + public: + YUVVideoSource(const std::string &file_name, aom_img_fmt format, + unsigned int width, unsigned int height, int rate_numerator, + int rate_denominator, unsigned int start, int limit) + : file_name_(file_name), input_file_(nullptr), img_(nullptr), + start_(start), limit_(limit), frame_(0), width_(0), height_(0), + format_(AOM_IMG_FMT_NONE), framerate_numerator_(rate_numerator), + framerate_denominator_(rate_denominator) { + // This initializes format_, raw_size_, width_, height_ and allocates img. + SetSize(width, height, format); + } + + ~YUVVideoSource() override { + aom_img_free(img_); + if (input_file_) fclose(input_file_); + } + + void Begin() override { + if (input_file_) fclose(input_file_); + input_file_ = OpenTestDataFile(file_name_); + ASSERT_NE(input_file_, nullptr) + << "Input file open failed. Filename: " << file_name_; + if (start_) + fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET); + + frame_ = start_; + FillFrame(); + } + + void Next() override { + ++frame_; + FillFrame(); + } + + aom_image_t *img() const override { + return (frame_ < limit_) ? img_ : nullptr; + } + + // Models a stream where Timebase = 1/FPS, so pts == frame. + aom_codec_pts_t pts() const override { return frame_; } + + unsigned long duration() const override { return 1; } + + aom_rational_t timebase() const override { + const aom_rational_t t = { framerate_denominator_, framerate_numerator_ }; + return t; + } + + unsigned int frame() const override { return frame_; } + + unsigned int limit() const override { return limit_; } + + virtual void SetSize(unsigned int width, unsigned int height, + aom_img_fmt format) { + if (width != width_ || height != height_ || format != format_) { + aom_img_free(img_); + img_ = aom_img_alloc(nullptr, format, width, height, 1); + ASSERT_NE(img_, nullptr); + width_ = width; + height_ = height; + format_ = format; + switch (format) { + case AOM_IMG_FMT_NV12: + case AOM_IMG_FMT_I420: raw_size_ = width * height * 3 / 2; break; + case AOM_IMG_FMT_I422: raw_size_ = width * height * 2; break; + case AOM_IMG_FMT_I444: raw_size_ = width * height * 3; break; + case AOM_IMG_FMT_I42016: raw_size_ = width * height * 3; break; + case AOM_IMG_FMT_I42216: raw_size_ = width * height * 4; break; + case AOM_IMG_FMT_I44416: raw_size_ = width * height * 6; break; + default: ASSERT_TRUE(0); + } + } + } + + virtual void FillFrame() { + ASSERT_NE(input_file_, nullptr); + // Read a frame from input_file. + if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) { + limit_ = frame_; + } + } + + protected: + std::string file_name_; + FILE *input_file_; + aom_image_t *img_; + size_t raw_size_; + unsigned int start_; + unsigned int limit_; + unsigned int frame_; + unsigned int width_; + unsigned int height_; + aom_img_fmt format_; + int framerate_numerator_; + int framerate_denominator_; +}; + +} // namespace libaom_test + +#endif // AOM_TEST_YUV_VIDEO_SOURCE_H_ |